4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
832 use_default=use_default)
833 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1235 _compute_fn=_ComputeMinMaxSpec):
1236 """Verifies ipolicy against provided specs.
1239 @param ipolicy: The ipolicy
1241 @param mem_size: The memory size
1242 @type cpu_count: int
1243 @param cpu_count: Used cpu cores
1244 @type disk_count: int
1245 @param disk_count: Number of disks used
1246 @type nic_count: int
1247 @param nic_count: Number of nics used
1248 @type disk_sizes: list of ints
1249 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1250 @type spindle_use: int
1251 @param spindle_use: The number of spindles this instance uses
1252 @type disk_template: string
1253 @param disk_template: The disk template of the instance
1254 @param _compute_fn: The compute function (unittest only)
1255 @return: A list of violations, or an empty list of no violations are found
1258 assert disk_count == len(disk_sizes)
1261 (constants.ISPEC_MEM_SIZE, "", mem_size),
1262 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1263 (constants.ISPEC_NIC_COUNT, "", nic_count),
1264 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1265 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1266 for idx, d in enumerate(disk_sizes)]
1267 if disk_template != constants.DT_DISKLESS:
1268 # This check doesn't make sense for diskless instances
1269 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1271 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1272 if disk_template not in allowed_dts:
1273 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1274 (disk_template, utils.CommaJoin(allowed_dts)))
1276 return ret + filter(None,
1277 (_compute_fn(name, qualifier, ipolicy, value)
1278 for (name, qualifier, value) in test_settings))
1281 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1282 _compute_fn=_ComputeIPolicySpecViolation):
1283 """Compute if instance meets the specs of ipolicy.
1286 @param ipolicy: The ipolicy to verify against
1287 @type instance: L{objects.Instance}
1288 @param instance: The instance to verify
1289 @type cfg: L{config.ConfigWriter}
1290 @param cfg: Cluster configuration
1291 @param _compute_fn: The function to verify ipolicy (unittest only)
1292 @see: L{_ComputeIPolicySpecViolation}
1295 be_full = cfg.GetClusterInfo().FillBE(instance)
1296 mem_size = be_full[constants.BE_MAXMEM]
1297 cpu_count = be_full[constants.BE_VCPUS]
1298 spindle_use = be_full[constants.BE_SPINDLE_USE]
1299 disk_count = len(instance.disks)
1300 disk_sizes = [disk.size for disk in instance.disks]
1301 nic_count = len(instance.nics)
1302 disk_template = instance.disk_template
1304 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1305 disk_sizes, spindle_use, disk_template)
1308 def _ComputeIPolicyInstanceSpecViolation(
1309 ipolicy, instance_spec, disk_template,
1310 _compute_fn=_ComputeIPolicySpecViolation):
1311 """Compute if instance specs meets the specs of ipolicy.
1314 @param ipolicy: The ipolicy to verify against
1315 @param instance_spec: dict
1316 @param instance_spec: The instance spec to verify
1317 @type disk_template: string
1318 @param disk_template: the disk template of the instance
1319 @param _compute_fn: The function to verify ipolicy (unittest only)
1320 @see: L{_ComputeIPolicySpecViolation}
1323 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1324 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1325 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1326 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1327 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1328 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1330 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1331 disk_sizes, spindle_use, disk_template)
1334 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1336 _compute_fn=_ComputeIPolicyInstanceViolation):
1337 """Compute if instance meets the specs of the new target group.
1339 @param ipolicy: The ipolicy to verify
1340 @param instance: The instance object to verify
1341 @param current_group: The current group of the instance
1342 @param target_group: The new group of the instance
1343 @type cfg: L{config.ConfigWriter}
1344 @param cfg: Cluster configuration
1345 @param _compute_fn: The function to verify ipolicy (unittest only)
1346 @see: L{_ComputeIPolicySpecViolation}
1349 if current_group == target_group:
1352 return _compute_fn(ipolicy, instance, cfg)
1355 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1356 _compute_fn=_ComputeIPolicyNodeViolation):
1357 """Checks that the target node is correct in terms of instance policy.
1359 @param ipolicy: The ipolicy to verify
1360 @param instance: The instance object to verify
1361 @param node: The new node to relocate
1362 @type cfg: L{config.ConfigWriter}
1363 @param cfg: Cluster configuration
1364 @param ignore: Ignore violations of the ipolicy
1365 @param _compute_fn: The function to verify ipolicy (unittest only)
1366 @see: L{_ComputeIPolicySpecViolation}
1369 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1370 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1373 msg = ("Instance does not meet target node group's (%s) instance"
1374 " policy: %s") % (node.group, utils.CommaJoin(res))
1378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1381 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1382 """Computes a set of any instances that would violate the new ipolicy.
1384 @param old_ipolicy: The current (still in-place) ipolicy
1385 @param new_ipolicy: The new (to become) ipolicy
1386 @param instances: List of instances to verify
1387 @type cfg: L{config.ConfigWriter}
1388 @param cfg: Cluster configuration
1389 @return: A list of instances which violates the new ipolicy but
1393 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1394 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1397 def _ExpandItemName(fn, name, kind):
1398 """Expand an item name.
1400 @param fn: the function to use for expansion
1401 @param name: requested item name
1402 @param kind: text description ('Node' or 'Instance')
1403 @return: the resolved (full) name
1404 @raise errors.OpPrereqError: if the item is not found
1407 full_name = fn(name)
1408 if full_name is None:
1409 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1414 def _ExpandNodeName(cfg, name):
1415 """Wrapper over L{_ExpandItemName} for nodes."""
1416 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1419 def _ExpandInstanceName(cfg, name):
1420 """Wrapper over L{_ExpandItemName} for instance."""
1421 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1424 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1426 """Builds network related env variables for hooks
1428 This builds the hook environment from individual variables.
1431 @param name: the name of the network
1432 @type subnet: string
1433 @param subnet: the ipv4 subnet
1434 @type gateway: string
1435 @param gateway: the ipv4 gateway
1436 @type network6: string
1437 @param network6: the ipv6 subnet
1438 @type gateway6: string
1439 @param gateway6: the ipv6 gateway
1440 @type mac_prefix: string
1441 @param mac_prefix: the mac_prefix
1443 @param tags: the tags of the network
1448 env["NETWORK_NAME"] = name
1450 env["NETWORK_SUBNET"] = subnet
1452 env["NETWORK_GATEWAY"] = gateway
1454 env["NETWORK_SUBNET6"] = network6
1456 env["NETWORK_GATEWAY6"] = gateway6
1458 env["NETWORK_MAC_PREFIX"] = mac_prefix
1460 env["NETWORK_TAGS"] = " ".join(tags)
1465 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1466 minmem, maxmem, vcpus, nics, disk_template, disks,
1467 bep, hvp, hypervisor_name, tags):
1468 """Builds instance related env variables for hooks
1470 This builds the hook environment from individual variables.
1473 @param name: the name of the instance
1474 @type primary_node: string
1475 @param primary_node: the name of the instance's primary node
1476 @type secondary_nodes: list
1477 @param secondary_nodes: list of secondary nodes as strings
1478 @type os_type: string
1479 @param os_type: the name of the instance's OS
1480 @type status: string
1481 @param status: the desired status of the instance
1482 @type minmem: string
1483 @param minmem: the minimum memory size of the instance
1484 @type maxmem: string
1485 @param maxmem: the maximum memory size of the instance
1487 @param vcpus: the count of VCPUs the instance has
1489 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1490 the NICs the instance has
1491 @type disk_template: string
1492 @param disk_template: the disk template of the instance
1494 @param disks: the list of (size, mode) pairs
1496 @param bep: the backend parameters for the instance
1498 @param hvp: the hypervisor parameters for the instance
1499 @type hypervisor_name: string
1500 @param hypervisor_name: the hypervisor for the instance
1502 @param tags: list of instance tags as strings
1504 @return: the hook environment for this instance
1509 "INSTANCE_NAME": name,
1510 "INSTANCE_PRIMARY": primary_node,
1511 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1512 "INSTANCE_OS_TYPE": os_type,
1513 "INSTANCE_STATUS": status,
1514 "INSTANCE_MINMEM": minmem,
1515 "INSTANCE_MAXMEM": maxmem,
1516 # TODO(2.9) remove deprecated "memory" value
1517 "INSTANCE_MEMORY": maxmem,
1518 "INSTANCE_VCPUS": vcpus,
1519 "INSTANCE_DISK_TEMPLATE": disk_template,
1520 "INSTANCE_HYPERVISOR": hypervisor_name,
1523 nic_count = len(nics)
1524 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1527 env["INSTANCE_NIC%d_IP" % idx] = ip
1528 env["INSTANCE_NIC%d_MAC" % idx] = mac
1529 env["INSTANCE_NIC%d_MODE" % idx] = mode
1530 env["INSTANCE_NIC%d_LINK" % idx] = link
1532 nobj = objects.Network.FromDict(netinfo)
1533 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1535 # FIXME: broken network reference: the instance NIC specifies a
1536 # network, but the relevant network entry was not in the config. This
1537 # should be made impossible.
1538 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1539 if mode == constants.NIC_MODE_BRIDGED:
1540 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1544 env["INSTANCE_NIC_COUNT"] = nic_count
1547 disk_count = len(disks)
1548 for idx, (size, mode) in enumerate(disks):
1549 env["INSTANCE_DISK%d_SIZE" % idx] = size
1550 env["INSTANCE_DISK%d_MODE" % idx] = mode
1554 env["INSTANCE_DISK_COUNT"] = disk_count
1559 env["INSTANCE_TAGS"] = " ".join(tags)
1561 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1562 for key, value in source.items():
1563 env["INSTANCE_%s_%s" % (kind, key)] = value
1568 def _NICToTuple(lu, nic):
1569 """Build a tupple of nic information.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nic: L{objects.NIC}
1574 @param nic: nic to convert to hooks tuple
1577 cluster = lu.cfg.GetClusterInfo()
1578 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1579 mode = filled_params[constants.NIC_MODE]
1580 link = filled_params[constants.NIC_LINK]
1583 nobj = lu.cfg.GetNetwork(nic.network)
1584 netinfo = objects.Network.ToDict(nobj)
1585 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1588 def _NICListToTuple(lu, nics):
1589 """Build a list of nic information tuples.
1591 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1592 value in LUInstanceQueryData.
1594 @type lu: L{LogicalUnit}
1595 @param lu: the logical unit on whose behalf we execute
1596 @type nics: list of L{objects.NIC}
1597 @param nics: list of nics to convert to hooks tuples
1602 hooks_nics.append(_NICToTuple(lu, nic))
1606 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1607 """Builds instance related env variables for hooks from an object.
1609 @type lu: L{LogicalUnit}
1610 @param lu: the logical unit on whose behalf we execute
1611 @type instance: L{objects.Instance}
1612 @param instance: the instance for which we should build the
1614 @type override: dict
1615 @param override: dictionary with key/values that will override
1618 @return: the hook environment dictionary
1621 cluster = lu.cfg.GetClusterInfo()
1622 bep = cluster.FillBE(instance)
1623 hvp = cluster.FillHV(instance)
1625 "name": instance.name,
1626 "primary_node": instance.primary_node,
1627 "secondary_nodes": instance.secondary_nodes,
1628 "os_type": instance.os,
1629 "status": instance.admin_state,
1630 "maxmem": bep[constants.BE_MAXMEM],
1631 "minmem": bep[constants.BE_MINMEM],
1632 "vcpus": bep[constants.BE_VCPUS],
1633 "nics": _NICListToTuple(lu, instance.nics),
1634 "disk_template": instance.disk_template,
1635 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1638 "hypervisor_name": instance.hypervisor,
1639 "tags": instance.tags,
1642 args.update(override)
1643 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1646 def _AdjustCandidatePool(lu, exceptions):
1647 """Adjust the candidate pool after node operations.
1650 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1652 lu.LogInfo("Promoted nodes to master candidate role: %s",
1653 utils.CommaJoin(node.name for node in mod_list))
1654 for name in mod_list:
1655 lu.context.ReaddNode(name)
1656 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1658 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1662 def _DecideSelfPromotion(lu, exceptions=None):
1663 """Decide whether I should promote myself as a master candidate.
1666 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1667 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1668 # the new node will increase mc_max with one, so:
1669 mc_should = min(mc_should + 1, cp_size)
1670 return mc_now < mc_should
1673 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1674 """Computes a set of instances who violates given ipolicy.
1676 @param ipolicy: The ipolicy to verify
1677 @type instances: L{objects.Instance}
1678 @param instances: List of instances to verify
1679 @type cfg: L{config.ConfigWriter}
1680 @param cfg: Cluster configuration
1681 @return: A frozenset of instance names violating the ipolicy
1684 return frozenset([inst.name for inst in instances
1685 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1688 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1689 """Check that the brigdes needed by a list of nics exist.
1692 cluster = lu.cfg.GetClusterInfo()
1693 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1694 brlist = [params[constants.NIC_LINK] for params in paramslist
1695 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1697 result = lu.rpc.call_bridges_exist(target_node, brlist)
1698 result.Raise("Error checking bridges on destination node '%s'" %
1699 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1702 def _CheckInstanceBridgesExist(lu, instance, node=None):
1703 """Check that the brigdes needed by an instance exist.
1707 node = instance.primary_node
1708 _CheckNicsBridgesExist(lu, instance.nics, node)
1711 def _CheckOSVariant(os_obj, name):
1712 """Check whether an OS name conforms to the os variants specification.
1714 @type os_obj: L{objects.OS}
1715 @param os_obj: OS object to check
1717 @param name: OS name passed by the user, to check for validity
1720 variant = objects.OS.GetVariant(name)
1721 if not os_obj.supported_variants:
1723 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1724 " passed)" % (os_obj.name, variant),
1728 raise errors.OpPrereqError("OS name must include a variant",
1731 if variant not in os_obj.supported_variants:
1732 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1735 def _GetNodeInstancesInner(cfg, fn):
1736 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1739 def _GetNodeInstances(cfg, node_name):
1740 """Returns a list of all primary and secondary instances on a node.
1744 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1747 def _GetNodePrimaryInstances(cfg, node_name):
1748 """Returns primary instances on a node.
1751 return _GetNodeInstancesInner(cfg,
1752 lambda inst: node_name == inst.primary_node)
1755 def _GetNodeSecondaryInstances(cfg, node_name):
1756 """Returns secondary instances on a node.
1759 return _GetNodeInstancesInner(cfg,
1760 lambda inst: node_name in inst.secondary_nodes)
1763 def _GetStorageTypeArgs(cfg, storage_type):
1764 """Returns the arguments for a storage type.
1767 # Special case for file storage
1768 if storage_type == constants.ST_FILE:
1769 # storage.FileStorage wants a list of storage directories
1770 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1775 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1778 for dev in instance.disks:
1779 cfg.SetDiskID(dev, node_name)
1781 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1783 result.Raise("Failed to get disk status from node %s" % node_name,
1784 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1786 for idx, bdev_status in enumerate(result.payload):
1787 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1793 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1794 """Check the sanity of iallocator and node arguments and use the
1795 cluster-wide iallocator if appropriate.
1797 Check that at most one of (iallocator, node) is specified. If none is
1798 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1799 then the LU's opcode's iallocator slot is filled with the cluster-wide
1802 @type iallocator_slot: string
1803 @param iallocator_slot: the name of the opcode iallocator slot
1804 @type node_slot: string
1805 @param node_slot: the name of the opcode target node slot
1808 node = getattr(lu.op, node_slot, None)
1809 ialloc = getattr(lu.op, iallocator_slot, None)
1813 if node is not None and ialloc is not None:
1814 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1816 elif ((node is None and ialloc is None) or
1817 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1818 default_iallocator = lu.cfg.GetDefaultIAllocator()
1819 if default_iallocator:
1820 setattr(lu.op, iallocator_slot, default_iallocator)
1822 raise errors.OpPrereqError("No iallocator or node given and no"
1823 " cluster-wide default iallocator found;"
1824 " please specify either an iallocator or a"
1825 " node, or set a cluster-wide default"
1826 " iallocator", errors.ECODE_INVAL)
1829 def _GetDefaultIAllocator(cfg, ialloc):
1830 """Decides on which iallocator to use.
1832 @type cfg: L{config.ConfigWriter}
1833 @param cfg: Cluster configuration object
1834 @type ialloc: string or None
1835 @param ialloc: Iallocator specified in opcode
1837 @return: Iallocator name
1841 # Use default iallocator
1842 ialloc = cfg.GetDefaultIAllocator()
1845 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1846 " opcode nor as a cluster-wide default",
1852 def _CheckHostnameSane(lu, name):
1853 """Ensures that a given hostname resolves to a 'sane' name.
1855 The given name is required to be a prefix of the resolved hostname,
1856 to prevent accidental mismatches.
1858 @param lu: the logical unit on behalf of which we're checking
1859 @param name: the name we should resolve and check
1860 @return: the resolved hostname object
1863 hostname = netutils.GetHostname(name=name)
1864 if hostname.name != name:
1865 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1866 if not utils.MatchNameComponent(name, [hostname.name]):
1867 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1868 " same as given hostname '%s'") %
1869 (hostname.name, name), errors.ECODE_INVAL)
1873 class LUClusterPostInit(LogicalUnit):
1874 """Logical unit for running hooks after cluster initialization.
1877 HPATH = "cluster-init"
1878 HTYPE = constants.HTYPE_CLUSTER
1880 def BuildHooksEnv(self):
1885 "OP_TARGET": self.cfg.GetClusterName(),
1888 def BuildHooksNodes(self):
1889 """Build hooks nodes.
1892 return ([], [self.cfg.GetMasterNode()])
1894 def Exec(self, feedback_fn):
1901 class LUClusterDestroy(LogicalUnit):
1902 """Logical unit for destroying the cluster.
1905 HPATH = "cluster-destroy"
1906 HTYPE = constants.HTYPE_CLUSTER
1908 def BuildHooksEnv(self):
1913 "OP_TARGET": self.cfg.GetClusterName(),
1916 def BuildHooksNodes(self):
1917 """Build hooks nodes.
1922 def CheckPrereq(self):
1923 """Check prerequisites.
1925 This checks whether the cluster is empty.
1927 Any errors are signaled by raising errors.OpPrereqError.
1930 master = self.cfg.GetMasterNode()
1932 nodelist = self.cfg.GetNodeList()
1933 if len(nodelist) != 1 or nodelist[0] != master:
1934 raise errors.OpPrereqError("There are still %d node(s) in"
1935 " this cluster." % (len(nodelist) - 1),
1937 instancelist = self.cfg.GetInstanceList()
1939 raise errors.OpPrereqError("There are still %d instance(s) in"
1940 " this cluster." % len(instancelist),
1943 def Exec(self, feedback_fn):
1944 """Destroys the cluster.
1947 master_params = self.cfg.GetMasterNetworkParameters()
1949 # Run post hooks on master node before it's removed
1950 _RunPostHook(self, master_params.name)
1952 ems = self.cfg.GetUseExternalMipScript()
1953 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1956 self.LogWarning("Error disabling the master IP address: %s",
1959 return master_params.name
1962 def _VerifyCertificate(filename):
1963 """Verifies a certificate for L{LUClusterVerifyConfig}.
1965 @type filename: string
1966 @param filename: Path to PEM file
1970 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1971 utils.ReadFile(filename))
1972 except Exception, err: # pylint: disable=W0703
1973 return (LUClusterVerifyConfig.ETYPE_ERROR,
1974 "Failed to load X509 certificate %s: %s" % (filename, err))
1977 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1978 constants.SSL_CERT_EXPIRATION_ERROR)
1981 fnamemsg = "While verifying %s: %s" % (filename, msg)
1986 return (None, fnamemsg)
1987 elif errcode == utils.CERT_WARNING:
1988 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1989 elif errcode == utils.CERT_ERROR:
1990 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1992 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1995 def _GetAllHypervisorParameters(cluster, instances):
1996 """Compute the set of all hypervisor parameters.
1998 @type cluster: L{objects.Cluster}
1999 @param cluster: the cluster object
2000 @param instances: list of L{objects.Instance}
2001 @param instances: additional instances from which to obtain parameters
2002 @rtype: list of (origin, hypervisor, parameters)
2003 @return: a list with all parameters found, indicating the hypervisor they
2004 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2009 for hv_name in cluster.enabled_hypervisors:
2010 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2012 for os_name, os_hvp in cluster.os_hvp.items():
2013 for hv_name, hv_params in os_hvp.items():
2015 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2016 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2018 # TODO: collapse identical parameter values in a single one
2019 for instance in instances:
2020 if instance.hvparams:
2021 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2022 cluster.FillHV(instance)))
2027 class _VerifyErrors(object):
2028 """Mix-in for cluster/group verify LUs.
2030 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2031 self.op and self._feedback_fn to be available.)
2035 ETYPE_FIELD = "code"
2036 ETYPE_ERROR = "ERROR"
2037 ETYPE_WARNING = "WARNING"
2039 def _Error(self, ecode, item, msg, *args, **kwargs):
2040 """Format an error message.
2042 Based on the opcode's error_codes parameter, either format a
2043 parseable error code, or a simpler error string.
2045 This must be called only from Exec and functions called from Exec.
2048 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2049 itype, etxt, _ = ecode
2050 # If the error code is in the list of ignored errors, demote the error to a
2052 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2053 ltype = self.ETYPE_WARNING
2054 # first complete the msg
2057 # then format the whole message
2058 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2059 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2065 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2066 # and finally report it via the feedback_fn
2067 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2068 # do not mark the operation as failed for WARN cases only
2069 if ltype == self.ETYPE_ERROR:
2072 def _ErrorIf(self, cond, *args, **kwargs):
2073 """Log an error message if the passed condition is True.
2077 or self.op.debug_simulate_errors): # pylint: disable=E1101
2078 self._Error(*args, **kwargs)
2081 class LUClusterVerify(NoHooksLU):
2082 """Submits all jobs necessary to verify the cluster.
2087 def ExpandNames(self):
2088 self.needed_locks = {}
2090 def Exec(self, feedback_fn):
2093 if self.op.group_name:
2094 groups = [self.op.group_name]
2095 depends_fn = lambda: None
2097 groups = self.cfg.GetNodeGroupList()
2099 # Verify global configuration
2101 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2104 # Always depend on global verification
2105 depends_fn = lambda: [(-len(jobs), [])]
2108 [opcodes.OpClusterVerifyGroup(group_name=group,
2109 ignore_errors=self.op.ignore_errors,
2110 depends=depends_fn())]
2111 for group in groups)
2113 # Fix up all parameters
2114 for op in itertools.chain(*jobs): # pylint: disable=W0142
2115 op.debug_simulate_errors = self.op.debug_simulate_errors
2116 op.verbose = self.op.verbose
2117 op.error_codes = self.op.error_codes
2119 op.skip_checks = self.op.skip_checks
2120 except AttributeError:
2121 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2123 return ResultWithJobs(jobs)
2126 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2127 """Verifies the cluster config.
2132 def _VerifyHVP(self, hvp_data):
2133 """Verifies locally the syntax of the hypervisor parameters.
2136 for item, hv_name, hv_params in hvp_data:
2137 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2140 hv_class = hypervisor.GetHypervisorClass(hv_name)
2141 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2142 hv_class.CheckParameterSyntax(hv_params)
2143 except errors.GenericError, err:
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2146 def ExpandNames(self):
2147 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2148 self.share_locks = _ShareAll()
2150 def CheckPrereq(self):
2151 """Check prerequisites.
2154 # Retrieve all information
2155 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2156 self.all_node_info = self.cfg.GetAllNodesInfo()
2157 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2159 def Exec(self, feedback_fn):
2160 """Verify integrity of cluster, performing various test on nodes.
2164 self._feedback_fn = feedback_fn
2166 feedback_fn("* Verifying cluster config")
2168 for msg in self.cfg.VerifyConfig():
2169 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2171 feedback_fn("* Verifying cluster certificate files")
2173 for cert_filename in pathutils.ALL_CERT_FILES:
2174 (errcode, msg) = _VerifyCertificate(cert_filename)
2175 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2177 feedback_fn("* Verifying hypervisor parameters")
2179 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2180 self.all_inst_info.values()))
2182 feedback_fn("* Verifying all nodes belong to an existing group")
2184 # We do this verification here because, should this bogus circumstance
2185 # occur, it would never be caught by VerifyGroup, which only acts on
2186 # nodes/instances reachable from existing node groups.
2188 dangling_nodes = set(node.name for node in self.all_node_info.values()
2189 if node.group not in self.all_group_info)
2191 dangling_instances = {}
2192 no_node_instances = []
2194 for inst in self.all_inst_info.values():
2195 if inst.primary_node in dangling_nodes:
2196 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2197 elif inst.primary_node not in self.all_node_info:
2198 no_node_instances.append(inst.name)
2203 utils.CommaJoin(dangling_instances.get(node.name,
2205 for node in dangling_nodes]
2207 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2209 "the following nodes (and their instances) belong to a non"
2210 " existing group: %s", utils.CommaJoin(pretty_dangling))
2212 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2214 "the following instances have a non-existing primary-node:"
2215 " %s", utils.CommaJoin(no_node_instances))
2220 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2221 """Verifies the status of a node group.
2224 HPATH = "cluster-verify"
2225 HTYPE = constants.HTYPE_CLUSTER
2228 _HOOKS_INDENT_RE = re.compile("^", re.M)
2230 class NodeImage(object):
2231 """A class representing the logical and physical status of a node.
2234 @ivar name: the node name to which this object refers
2235 @ivar volumes: a structure as returned from
2236 L{ganeti.backend.GetVolumeList} (runtime)
2237 @ivar instances: a list of running instances (runtime)
2238 @ivar pinst: list of configured primary instances (config)
2239 @ivar sinst: list of configured secondary instances (config)
2240 @ivar sbp: dictionary of {primary-node: list of instances} for all
2241 instances for which this node is secondary (config)
2242 @ivar mfree: free memory, as reported by hypervisor (runtime)
2243 @ivar dfree: free disk, as reported by the node (runtime)
2244 @ivar offline: the offline status (config)
2245 @type rpc_fail: boolean
2246 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2247 not whether the individual keys were correct) (runtime)
2248 @type lvm_fail: boolean
2249 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2250 @type hyp_fail: boolean
2251 @ivar hyp_fail: whether the RPC call didn't return the instance list
2252 @type ghost: boolean
2253 @ivar ghost: whether this is a known node or not (config)
2254 @type os_fail: boolean
2255 @ivar os_fail: whether the RPC call didn't return valid OS data
2257 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2258 @type vm_capable: boolean
2259 @ivar vm_capable: whether the node can host instances
2261 @ivar pv_min: size in MiB of the smallest PVs
2263 @ivar pv_max: size in MiB of the biggest PVs
2266 def __init__(self, offline=False, name=None, vm_capable=True):
2275 self.offline = offline
2276 self.vm_capable = vm_capable
2277 self.rpc_fail = False
2278 self.lvm_fail = False
2279 self.hyp_fail = False
2281 self.os_fail = False
2286 def ExpandNames(self):
2287 # This raises errors.OpPrereqError on its own:
2288 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2290 # Get instances in node group; this is unsafe and needs verification later
2292 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2294 self.needed_locks = {
2295 locking.LEVEL_INSTANCE: inst_names,
2296 locking.LEVEL_NODEGROUP: [self.group_uuid],
2297 locking.LEVEL_NODE: [],
2299 # This opcode is run by watcher every five minutes and acquires all nodes
2300 # for a group. It doesn't run for a long time, so it's better to acquire
2301 # the node allocation lock as well.
2302 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2305 self.share_locks = _ShareAll()
2307 def DeclareLocks(self, level):
2308 if level == locking.LEVEL_NODE:
2309 # Get members of node group; this is unsafe and needs verification later
2310 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2312 all_inst_info = self.cfg.GetAllInstancesInfo()
2314 # In Exec(), we warn about mirrored instances that have primary and
2315 # secondary living in separate node groups. To fully verify that
2316 # volumes for these instances are healthy, we will need to do an
2317 # extra call to their secondaries. We ensure here those nodes will
2319 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2320 # Important: access only the instances whose lock is owned
2321 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2322 nodes.update(all_inst_info[inst].secondary_nodes)
2324 self.needed_locks[locking.LEVEL_NODE] = nodes
2326 def CheckPrereq(self):
2327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2328 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2330 group_nodes = set(self.group_info.members)
2332 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2335 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2337 unlocked_instances = \
2338 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2341 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2342 utils.CommaJoin(unlocked_nodes),
2345 if unlocked_instances:
2346 raise errors.OpPrereqError("Missing lock for instances: %s" %
2347 utils.CommaJoin(unlocked_instances),
2350 self.all_node_info = self.cfg.GetAllNodesInfo()
2351 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2353 self.my_node_names = utils.NiceSort(group_nodes)
2354 self.my_inst_names = utils.NiceSort(group_instances)
2356 self.my_node_info = dict((name, self.all_node_info[name])
2357 for name in self.my_node_names)
2359 self.my_inst_info = dict((name, self.all_inst_info[name])
2360 for name in self.my_inst_names)
2362 # We detect here the nodes that will need the extra RPC calls for verifying
2363 # split LV volumes; they should be locked.
2364 extra_lv_nodes = set()
2366 for inst in self.my_inst_info.values():
2367 if inst.disk_template in constants.DTS_INT_MIRROR:
2368 for nname in inst.all_nodes:
2369 if self.all_node_info[nname].group != self.group_uuid:
2370 extra_lv_nodes.add(nname)
2372 unlocked_lv_nodes = \
2373 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2375 if unlocked_lv_nodes:
2376 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2377 utils.CommaJoin(unlocked_lv_nodes),
2379 self.extra_lv_nodes = list(extra_lv_nodes)
2381 def _VerifyNode(self, ninfo, nresult):
2382 """Perform some basic validation on data returned from a node.
2384 - check the result data structure is well formed and has all the
2386 - check ganeti version
2388 @type ninfo: L{objects.Node}
2389 @param ninfo: the node to check
2390 @param nresult: the results from the node
2392 @return: whether overall this call was successful (and we can expect
2393 reasonable values in the respose)
2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399 # main result, nresult should be a non-empty dict
2400 test = not nresult or not isinstance(nresult, dict)
2401 _ErrorIf(test, constants.CV_ENODERPC, node,
2402 "unable to verify node: no data returned")
2406 # compares ganeti version
2407 local_version = constants.PROTOCOL_VERSION
2408 remote_version = nresult.get("version", None)
2409 test = not (remote_version and
2410 isinstance(remote_version, (list, tuple)) and
2411 len(remote_version) == 2)
2412 _ErrorIf(test, constants.CV_ENODERPC, node,
2413 "connection to node returned invalid data")
2417 test = local_version != remote_version[0]
2418 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2419 "incompatible protocol versions: master %s,"
2420 " node %s", local_version, remote_version[0])
2424 # node seems compatible, we can actually try to look into its results
2426 # full package version
2427 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2428 constants.CV_ENODEVERSION, node,
2429 "software version mismatch: master %s, node %s",
2430 constants.RELEASE_VERSION, remote_version[1],
2431 code=self.ETYPE_WARNING)
2433 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2434 if ninfo.vm_capable and isinstance(hyp_result, dict):
2435 for hv_name, hv_result in hyp_result.iteritems():
2436 test = hv_result is not None
2437 _ErrorIf(test, constants.CV_ENODEHV, node,
2438 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2440 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2441 if ninfo.vm_capable and isinstance(hvp_result, list):
2442 for item, hv_name, hv_result in hvp_result:
2443 _ErrorIf(True, constants.CV_ENODEHV, node,
2444 "hypervisor %s parameter verify failure (source %s): %s",
2445 hv_name, item, hv_result)
2447 test = nresult.get(constants.NV_NODESETUP,
2448 ["Missing NODESETUP results"])
2449 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2454 def _VerifyNodeTime(self, ninfo, nresult,
2455 nvinfo_starttime, nvinfo_endtime):
2456 """Check the node time.
2458 @type ninfo: L{objects.Node}
2459 @param ninfo: the node to check
2460 @param nresult: the remote results for the node
2461 @param nvinfo_starttime: the start time of the RPC call
2462 @param nvinfo_endtime: the end time of the RPC call
2466 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468 ntime = nresult.get(constants.NV_TIME, None)
2470 ntime_merged = utils.MergeTime(ntime)
2471 except (ValueError, TypeError):
2472 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2475 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2476 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2477 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2478 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2482 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2483 "Node time diverges by at least %s from master node time",
2486 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2487 """Check the node LVM results and update info for cross-node checks.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nresult: the remote results for the node
2492 @param vg_name: the configured VG name
2493 @type nimg: L{NodeImage}
2494 @param nimg: node image
2501 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2503 # checks vg existence and size > 20G
2504 vglist = nresult.get(constants.NV_VGLIST, None)
2506 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2509 constants.MIN_VG_SIZE)
2510 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2513 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2515 self._Error(constants.CV_ENODELVM, node, em)
2516 if pvminmax is not None:
2517 (nimg.pv_min, nimg.pv_max) = pvminmax
2519 def _VerifyGroupLVM(self, node_image, vg_name):
2520 """Check cross-node consistency in LVM.
2522 @type node_image: dict
2523 @param node_image: info about nodes, mapping from node to names to
2524 L{NodeImage} objects
2525 @param vg_name: the configured VG name
2531 # Only exlcusive storage needs this kind of checks
2532 if not self._exclusive_storage:
2535 # exclusive_storage wants all PVs to have the same size (approximately),
2536 # if the smallest and the biggest ones are okay, everything is fine.
2537 # pv_min is None iff pv_max is None
2538 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2541 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2542 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2543 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2544 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2545 "PV sizes differ too much in the group; smallest (%s MB) is"
2546 " on %s, biggest (%s MB) is on %s",
2547 pvmin, minnode, pvmax, maxnode)
2549 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2550 """Check the node bridges.
2552 @type ninfo: L{objects.Node}
2553 @param ninfo: the node to check
2554 @param nresult: the remote results for the node
2555 @param bridges: the expected list of bridges
2562 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2564 missing = nresult.get(constants.NV_BRIDGES, None)
2565 test = not isinstance(missing, list)
2566 _ErrorIf(test, constants.CV_ENODENET, node,
2567 "did not return valid bridge information")
2569 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2570 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2572 def _VerifyNodeUserScripts(self, ninfo, nresult):
2573 """Check the results of user scripts presence and executability on the node
2575 @type ninfo: L{objects.Node}
2576 @param ninfo: the node to check
2577 @param nresult: the remote results for the node
2582 test = not constants.NV_USERSCRIPTS in nresult
2583 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2584 "did not return user scripts information")
2586 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2588 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2589 "user scripts not present or not executable: %s" %
2590 utils.CommaJoin(sorted(broken_scripts)))
2592 def _VerifyNodeNetwork(self, ninfo, nresult):
2593 """Check the node network connectivity results.
2595 @type ninfo: L{objects.Node}
2596 @param ninfo: the node to check
2597 @param nresult: the remote results for the node
2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2603 test = constants.NV_NODELIST not in nresult
2604 _ErrorIf(test, constants.CV_ENODESSH, node,
2605 "node hasn't returned node ssh connectivity data")
2607 if nresult[constants.NV_NODELIST]:
2608 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2609 _ErrorIf(True, constants.CV_ENODESSH, node,
2610 "ssh communication with node '%s': %s", a_node, a_msg)
2612 test = constants.NV_NODENETTEST not in nresult
2613 _ErrorIf(test, constants.CV_ENODENET, node,
2614 "node hasn't returned node tcp connectivity data")
2616 if nresult[constants.NV_NODENETTEST]:
2617 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2619 _ErrorIf(True, constants.CV_ENODENET, node,
2620 "tcp communication with node '%s': %s",
2621 anode, nresult[constants.NV_NODENETTEST][anode])
2623 test = constants.NV_MASTERIP not in nresult
2624 _ErrorIf(test, constants.CV_ENODENET, node,
2625 "node hasn't returned node master IP reachability data")
2627 if not nresult[constants.NV_MASTERIP]:
2628 if node == self.master_node:
2629 msg = "the master node cannot reach the master IP (not configured?)"
2631 msg = "cannot reach the master IP"
2632 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2634 def _VerifyInstance(self, instance, inst_config, node_image,
2636 """Verify an instance.
2638 This function checks to see if the required block devices are
2639 available on the instance's node, and that the nodes are in the correct
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 pnode = inst_config.primary_node
2645 pnode_img = node_image[pnode]
2646 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2648 node_vol_should = {}
2649 inst_config.MapLVsByNode(node_vol_should)
2651 cluster = self.cfg.GetClusterInfo()
2652 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2654 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2655 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2656 code=self.ETYPE_WARNING)
2658 for node in node_vol_should:
2659 n_img = node_image[node]
2660 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2661 # ignore missing volumes on offline or broken nodes
2663 for volume in node_vol_should[node]:
2664 test = volume not in n_img.volumes
2665 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2666 "volume %s missing on node %s", volume, node)
2668 if inst_config.admin_state == constants.ADMINST_UP:
2669 test = instance not in pnode_img.instances and not pnode_img.offline
2670 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2671 "instance not running on its primary node %s",
2673 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2674 "instance is marked as running and lives on offline node %s",
2677 diskdata = [(nname, success, status, idx)
2678 for (nname, disks) in diskstatus.items()
2679 for idx, (success, status) in enumerate(disks)]
2681 for nname, success, bdev_status, idx in diskdata:
2682 # the 'ghost node' construction in Exec() ensures that we have a
2684 snode = node_image[nname]
2685 bad_snode = snode.ghost or snode.offline
2686 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2687 not success and not bad_snode,
2688 constants.CV_EINSTANCEFAULTYDISK, instance,
2689 "couldn't retrieve status for disk/%s on %s: %s",
2690 idx, nname, bdev_status)
2691 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2692 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2693 constants.CV_EINSTANCEFAULTYDISK, instance,
2694 "disk/%s on %s is faulty", idx, nname)
2696 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2697 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2698 " primary node failed", instance)
2700 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2701 constants.CV_EINSTANCELAYOUT,
2702 instance, "instance has multiple secondary nodes: %s",
2703 utils.CommaJoin(inst_config.secondary_nodes),
2704 code=self.ETYPE_WARNING)
2706 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2707 # Disk template not compatible with exclusive_storage: no instance
2708 # node should have the flag set
2709 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2710 inst_config.all_nodes)
2711 es_nodes = [n for (n, es) in es_flags.items()
2713 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2714 "instance has template %s, which is not supported on nodes"
2715 " that have exclusive storage set: %s",
2716 inst_config.disk_template, utils.CommaJoin(es_nodes))
2718 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2719 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720 instance_groups = {}
2722 for node in instance_nodes:
2723 instance_groups.setdefault(self.all_node_info[node].group,
2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728 # Sort so that we always list the primary node first.
2729 for group, nodes in sorted(instance_groups.items(),
2730 key=lambda (_, nodes): pnode in nodes,
2733 self._ErrorIf(len(instance_groups) > 1,
2734 constants.CV_EINSTANCESPLITGROUPS,
2735 instance, "instance has primary and secondary nodes in"
2736 " different groups: %s", utils.CommaJoin(pretty_list),
2737 code=self.ETYPE_WARNING)
2739 inst_nodes_offline = []
2740 for snode in inst_config.secondary_nodes:
2741 s_img = node_image[snode]
2742 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2743 snode, "instance %s, connection to secondary node failed",
2747 inst_nodes_offline.append(snode)
2749 # warn that the instance lives on offline nodes
2750 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2751 "instance has offline secondary node(s) %s",
2752 utils.CommaJoin(inst_nodes_offline))
2753 # ... or ghost/non-vm_capable nodes
2754 for node in inst_config.all_nodes:
2755 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2756 instance, "instance lives on ghost node %s", node)
2757 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2758 instance, "instance lives on non-vm_capable node %s", node)
2760 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2761 """Verify if there are any unknown volumes in the cluster.
2763 The .os, .swap and backup volumes are ignored. All other volumes are
2764 reported as unknown.
2766 @type reserved: L{ganeti.utils.FieldSet}
2767 @param reserved: a FieldSet of reserved volume names
2770 for node, n_img in node_image.items():
2771 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2772 self.all_node_info[node].group != self.group_uuid):
2773 # skip non-healthy nodes
2775 for volume in n_img.volumes:
2776 test = ((node not in node_vol_should or
2777 volume not in node_vol_should[node]) and
2778 not reserved.Matches(volume))
2779 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2780 "volume %s is unknown", volume)
2782 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2783 """Verify N+1 Memory Resilience.
2785 Check that if one single node dies we can still start all the
2786 instances it was primary for.
2789 cluster_info = self.cfg.GetClusterInfo()
2790 for node, n_img in node_image.items():
2791 # This code checks that every node which is now listed as
2792 # secondary has enough memory to host all instances it is
2793 # supposed to should a single other node in the cluster fail.
2794 # FIXME: not ready for failover to an arbitrary node
2795 # FIXME: does not support file-backed instances
2796 # WARNING: we currently take into account down instances as well
2797 # as up ones, considering that even if they're down someone
2798 # might want to start them even in the event of a node failure.
2799 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2800 # we're skipping nodes marked offline and nodes in other groups from
2801 # the N+1 warning, since most likely we don't have good memory
2802 # infromation from them; we already list instances living on such
2803 # nodes, and that's enough warning
2805 #TODO(dynmem): also consider ballooning out other instances
2806 for prinode, instances in n_img.sbp.items():
2808 for instance in instances:
2809 bep = cluster_info.FillBE(instance_cfg[instance])
2810 if bep[constants.BE_AUTO_BALANCE]:
2811 needed_mem += bep[constants.BE_MINMEM]
2812 test = n_img.mfree < needed_mem
2813 self._ErrorIf(test, constants.CV_ENODEN1, node,
2814 "not enough memory to accomodate instance failovers"
2815 " should node %s fail (%dMiB needed, %dMiB available)",
2816 prinode, needed_mem, n_img.mfree)
2819 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2820 (files_all, files_opt, files_mc, files_vm)):
2821 """Verifies file checksums collected from all nodes.
2823 @param errorif: Callback for reporting errors
2824 @param nodeinfo: List of L{objects.Node} objects
2825 @param master_node: Name of master node
2826 @param all_nvinfo: RPC results
2829 # Define functions determining which nodes to consider for a file
2832 (files_mc, lambda node: (node.master_candidate or
2833 node.name == master_node)),
2834 (files_vm, lambda node: node.vm_capable),
2837 # Build mapping from filename to list of nodes which should have the file
2839 for (files, fn) in files2nodefn:
2841 filenodes = nodeinfo
2843 filenodes = filter(fn, nodeinfo)
2844 nodefiles.update((filename,
2845 frozenset(map(operator.attrgetter("name"), filenodes)))
2846 for filename in files)
2848 assert set(nodefiles) == (files_all | files_mc | files_vm)
2850 fileinfo = dict((filename, {}) for filename in nodefiles)
2851 ignore_nodes = set()
2853 for node in nodeinfo:
2855 ignore_nodes.add(node.name)
2858 nresult = all_nvinfo[node.name]
2860 if nresult.fail_msg or not nresult.payload:
2863 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2864 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2865 for (key, value) in fingerprints.items())
2868 test = not (node_files and isinstance(node_files, dict))
2869 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2870 "Node did not return file checksum data")
2872 ignore_nodes.add(node.name)
2875 # Build per-checksum mapping from filename to nodes having it
2876 for (filename, checksum) in node_files.items():
2877 assert filename in nodefiles
2878 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2880 for (filename, checksums) in fileinfo.items():
2881 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2883 # Nodes having the file
2884 with_file = frozenset(node_name
2885 for nodes in fileinfo[filename].values()
2886 for node_name in nodes) - ignore_nodes
2888 expected_nodes = nodefiles[filename] - ignore_nodes
2890 # Nodes missing file
2891 missing_file = expected_nodes - with_file
2893 if filename in files_opt:
2895 errorif(missing_file and missing_file != expected_nodes,
2896 constants.CV_ECLUSTERFILECHECK, None,
2897 "File %s is optional, but it must exist on all or no"
2898 " nodes (not found on %s)",
2899 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2901 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2902 "File %s is missing from node(s) %s", filename,
2903 utils.CommaJoin(utils.NiceSort(missing_file)))
2905 # Warn if a node has a file it shouldn't
2906 unexpected = with_file - expected_nodes
2908 constants.CV_ECLUSTERFILECHECK, None,
2909 "File %s should not exist on node(s) %s",
2910 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2912 # See if there are multiple versions of the file
2913 test = len(checksums) > 1
2915 variants = ["variant %s on %s" %
2916 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2917 for (idx, (checksum, nodes)) in
2918 enumerate(sorted(checksums.items()))]
2922 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2923 "File %s found with %s different checksums (%s)",
2924 filename, len(checksums), "; ".join(variants))
2926 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2928 """Verifies and the node DRBD status.
2930 @type ninfo: L{objects.Node}
2931 @param ninfo: the node to check
2932 @param nresult: the remote results for the node
2933 @param instanceinfo: the dict of instances
2934 @param drbd_helper: the configured DRBD usermode helper
2935 @param drbd_map: the DRBD map as returned by
2936 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2943 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2944 test = (helper_result is None)
2945 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2946 "no drbd usermode helper returned")
2948 status, payload = helper_result
2950 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2951 "drbd usermode helper check unsuccessful: %s", payload)
2952 test = status and (payload != drbd_helper)
2953 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2954 "wrong drbd usermode helper: %s", payload)
2956 # compute the DRBD minors
2958 for minor, instance in drbd_map[node].items():
2959 test = instance not in instanceinfo
2960 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2961 "ghost instance '%s' in temporary DRBD map", instance)
2962 # ghost instance should not be running, but otherwise we
2963 # don't give double warnings (both ghost instance and
2964 # unallocated minor in use)
2966 node_drbd[minor] = (instance, False)
2968 instance = instanceinfo[instance]
2969 node_drbd[minor] = (instance.name,
2970 instance.admin_state == constants.ADMINST_UP)
2972 # and now check them
2973 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2974 test = not isinstance(used_minors, (tuple, list))
2975 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2976 "cannot parse drbd status file: %s", str(used_minors))
2978 # we cannot check drbd status
2981 for minor, (iname, must_exist) in node_drbd.items():
2982 test = minor not in used_minors and must_exist
2983 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2984 "drbd minor %d of instance %s is not active", minor, iname)
2985 for minor in used_minors:
2986 test = minor not in node_drbd
2987 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2988 "unallocated drbd minor %d is in use", minor)
2990 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2991 """Builds the node OS structures.
2993 @type ninfo: L{objects.Node}
2994 @param ninfo: the node to check
2995 @param nresult: the remote results for the node
2996 @param nimg: the node image object
3000 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3002 remote_os = nresult.get(constants.NV_OSLIST, None)
3003 test = (not isinstance(remote_os, list) or
3004 not compat.all(isinstance(v, list) and len(v) == 7
3005 for v in remote_os))
3007 _ErrorIf(test, constants.CV_ENODEOS, node,
3008 "node hasn't returned valid OS data")
3017 for (name, os_path, status, diagnose,
3018 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3020 if name not in os_dict:
3023 # parameters is a list of lists instead of list of tuples due to
3024 # JSON lacking a real tuple type, fix it:
3025 parameters = [tuple(v) for v in parameters]
3026 os_dict[name].append((os_path, status, diagnose,
3027 set(variants), set(parameters), set(api_ver)))
3029 nimg.oslist = os_dict
3031 def _VerifyNodeOS(self, ninfo, nimg, base):
3032 """Verifies the node OS list.
3034 @type ninfo: L{objects.Node}
3035 @param ninfo: the node to check
3036 @param nimg: the node image object
3037 @param base: the 'template' node we match against (e.g. from the master)
3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3043 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3045 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3046 for os_name, os_data in nimg.oslist.items():
3047 assert os_data, "Empty OS status for OS %s?!" % os_name
3048 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3049 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3050 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3051 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3052 "OS '%s' has multiple entries (first one shadows the rest): %s",
3053 os_name, utils.CommaJoin([v[0] for v in os_data]))
3054 # comparisons with the 'base' image
3055 test = os_name not in base.oslist
3056 _ErrorIf(test, constants.CV_ENODEOS, node,
3057 "Extra OS %s not present on reference node (%s)",
3061 assert base.oslist[os_name], "Base node has empty OS status?"
3062 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3064 # base OS is invalid, skipping
3066 for kind, a, b in [("API version", f_api, b_api),
3067 ("variants list", f_var, b_var),
3068 ("parameters", beautify_params(f_param),
3069 beautify_params(b_param))]:
3070 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3071 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3072 kind, os_name, base.name,
3073 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3075 # check any missing OSes
3076 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3077 _ErrorIf(missing, constants.CV_ENODEOS, node,
3078 "OSes present on reference node %s but missing on this node: %s",
3079 base.name, utils.CommaJoin(missing))
3081 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3082 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3084 @type ninfo: L{objects.Node}
3085 @param ninfo: the node to check
3086 @param nresult: the remote results for the node
3087 @type is_master: bool
3088 @param is_master: Whether node is the master node
3094 (constants.ENABLE_FILE_STORAGE or
3095 constants.ENABLE_SHARED_FILE_STORAGE)):
3097 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3099 # This should never happen
3100 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3101 "Node did not return forbidden file storage paths")
3103 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3104 "Found forbidden file storage paths: %s",
3105 utils.CommaJoin(fspaths))
3107 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3108 constants.CV_ENODEFILESTORAGEPATHS, node,
3109 "Node should not have returned forbidden file storage"
3112 def _VerifyOob(self, ninfo, nresult):
3113 """Verifies out of band functionality of a node.
3115 @type ninfo: L{objects.Node}
3116 @param ninfo: the node to check
3117 @param nresult: the remote results for the node
3121 # We just have to verify the paths on master and/or master candidates
3122 # as the oob helper is invoked on the master
3123 if ((ninfo.master_candidate or ninfo.master_capable) and
3124 constants.NV_OOB_PATHS in nresult):
3125 for path_result in nresult[constants.NV_OOB_PATHS]:
3126 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3128 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3129 """Verifies and updates the node volume data.
3131 This function will update a L{NodeImage}'s internal structures
3132 with data from the remote call.
3134 @type ninfo: L{objects.Node}
3135 @param ninfo: the node to check
3136 @param nresult: the remote results for the node
3137 @param nimg: the node image object
3138 @param vg_name: the configured VG name
3142 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3144 nimg.lvm_fail = True
3145 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3148 elif isinstance(lvdata, basestring):
3149 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3150 utils.SafeEncode(lvdata))
3151 elif not isinstance(lvdata, dict):
3152 _ErrorIf(True, constants.CV_ENODELVM, node,
3153 "rpc call to node failed (lvlist)")
3155 nimg.volumes = lvdata
3156 nimg.lvm_fail = False
3158 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3159 """Verifies and updates the node instance list.
3161 If the listing was successful, then updates this node's instance
3162 list. Otherwise, it marks the RPC call as failed for the instance
3165 @type ninfo: L{objects.Node}
3166 @param ninfo: the node to check
3167 @param nresult: the remote results for the node
3168 @param nimg: the node image object
3171 idata = nresult.get(constants.NV_INSTANCELIST, None)
3172 test = not isinstance(idata, list)
3173 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3174 "rpc call to node failed (instancelist): %s",
3175 utils.SafeEncode(str(idata)))
3177 nimg.hyp_fail = True
3179 nimg.instances = idata
3181 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3182 """Verifies and computes a node information map
3184 @type ninfo: L{objects.Node}
3185 @param ninfo: the node to check
3186 @param nresult: the remote results for the node
3187 @param nimg: the node image object
3188 @param vg_name: the configured VG name
3192 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3194 # try to read free memory (from the hypervisor)
3195 hv_info = nresult.get(constants.NV_HVINFO, None)
3196 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3197 _ErrorIf(test, constants.CV_ENODEHV, node,
3198 "rpc call to node failed (hvinfo)")
3201 nimg.mfree = int(hv_info["memory_free"])
3202 except (ValueError, TypeError):
3203 _ErrorIf(True, constants.CV_ENODERPC, node,
3204 "node returned invalid nodeinfo, check hypervisor")
3206 # FIXME: devise a free space model for file based instances as well
3207 if vg_name is not None:
3208 test = (constants.NV_VGLIST not in nresult or
3209 vg_name not in nresult[constants.NV_VGLIST])
3210 _ErrorIf(test, constants.CV_ENODELVM, node,
3211 "node didn't return data for the volume group '%s'"
3212 " - it is either missing or broken", vg_name)
3215 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3216 except (ValueError, TypeError):
3217 _ErrorIf(True, constants.CV_ENODERPC, node,
3218 "node returned invalid LVM info, check LVM status")
3220 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3221 """Gets per-disk status information for all instances.
3223 @type nodelist: list of strings
3224 @param nodelist: Node names
3225 @type node_image: dict of (name, L{objects.Node})
3226 @param node_image: Node objects
3227 @type instanceinfo: dict of (name, L{objects.Instance})
3228 @param instanceinfo: Instance objects
3229 @rtype: {instance: {node: [(succes, payload)]}}
3230 @return: a dictionary of per-instance dictionaries with nodes as
3231 keys and disk information as values; the disk information is a
3232 list of tuples (success, payload)
3235 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3238 node_disks_devonly = {}
3239 diskless_instances = set()
3240 diskless = constants.DT_DISKLESS
3242 for nname in nodelist:
3243 node_instances = list(itertools.chain(node_image[nname].pinst,
3244 node_image[nname].sinst))
3245 diskless_instances.update(inst for inst in node_instances
3246 if instanceinfo[inst].disk_template == diskless)
3247 disks = [(inst, disk)
3248 for inst in node_instances
3249 for disk in instanceinfo[inst].disks]
3252 # No need to collect data
3255 node_disks[nname] = disks
3257 # _AnnotateDiskParams makes already copies of the disks
3259 for (inst, dev) in disks:
3260 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3261 self.cfg.SetDiskID(anno_disk, nname)
3262 devonly.append(anno_disk)
3264 node_disks_devonly[nname] = devonly
3266 assert len(node_disks) == len(node_disks_devonly)
3268 # Collect data from all nodes with disks
3269 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3272 assert len(result) == len(node_disks)
3276 for (nname, nres) in result.items():
3277 disks = node_disks[nname]
3280 # No data from this node
3281 data = len(disks) * [(False, "node offline")]
3284 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3285 "while getting disk information: %s", msg)
3287 # No data from this node
3288 data = len(disks) * [(False, msg)]
3291 for idx, i in enumerate(nres.payload):
3292 if isinstance(i, (tuple, list)) and len(i) == 2:
3295 logging.warning("Invalid result from node %s, entry %d: %s",
3297 data.append((False, "Invalid result from the remote node"))
3299 for ((inst, _), status) in zip(disks, data):
3300 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3302 # Add empty entries for diskless instances.
3303 for inst in diskless_instances:
3304 assert inst not in instdisk
3307 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3308 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3309 compat.all(isinstance(s, (tuple, list)) and
3310 len(s) == 2 for s in statuses)
3311 for inst, nnames in instdisk.items()
3312 for nname, statuses in nnames.items())
3314 instdisk_keys = set(instdisk)
3315 instanceinfo_keys = set(instanceinfo)
3316 assert instdisk_keys == instanceinfo_keys, \
3317 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3318 (instdisk_keys, instanceinfo_keys))
3323 def _SshNodeSelector(group_uuid, all_nodes):
3324 """Create endless iterators for all potential SSH check hosts.
3327 nodes = [node for node in all_nodes
3328 if (node.group != group_uuid and
3330 keyfunc = operator.attrgetter("group")
3332 return map(itertools.cycle,
3333 [sorted(map(operator.attrgetter("name"), names))
3334 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3338 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3339 """Choose which nodes should talk to which other nodes.
3341 We will make nodes contact all nodes in their group, and one node from
3344 @warning: This algorithm has a known issue if one node group is much
3345 smaller than others (e.g. just one node). In such a case all other
3346 nodes will talk to the single node.
3349 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3350 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3352 return (online_nodes,
3353 dict((name, sorted([i.next() for i in sel]))
3354 for name in online_nodes))
3356 def BuildHooksEnv(self):
3359 Cluster-Verify hooks just ran in the post phase and their failure makes
3360 the output be logged in the verify output and the verification to fail.
3364 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3367 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3368 for node in self.my_node_info.values())
3372 def BuildHooksNodes(self):
3373 """Build hooks nodes.
3376 return ([], self.my_node_names)
3378 def Exec(self, feedback_fn):
3379 """Verify integrity of the node group, performing various test on nodes.
3382 # This method has too many local variables. pylint: disable=R0914
3383 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3385 if not self.my_node_names:
3387 feedback_fn("* Empty node group, skipping verification")
3391 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3392 verbose = self.op.verbose
3393 self._feedback_fn = feedback_fn
3395 vg_name = self.cfg.GetVGName()
3396 drbd_helper = self.cfg.GetDRBDHelper()
3397 cluster = self.cfg.GetClusterInfo()
3398 hypervisors = cluster.enabled_hypervisors
3399 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3401 i_non_redundant = [] # Non redundant instances
3402 i_non_a_balanced = [] # Non auto-balanced instances
3403 i_offline = 0 # Count of offline instances
3404 n_offline = 0 # Count of offline nodes
3405 n_drained = 0 # Count of nodes being drained
3406 node_vol_should = {}
3408 # FIXME: verify OS list
3411 filemap = _ComputeAncillaryFiles(cluster, False)
3413 # do local checksums
3414 master_node = self.master_node = self.cfg.GetMasterNode()
3415 master_ip = self.cfg.GetMasterIP()
3417 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3420 if self.cfg.GetUseExternalMipScript():
3421 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3423 node_verify_param = {
3424 constants.NV_FILELIST:
3425 map(vcluster.MakeVirtualPath,
3426 utils.UniqueSequence(filename
3427 for files in filemap
3428 for filename in files)),
3429 constants.NV_NODELIST:
3430 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3431 self.all_node_info.values()),
3432 constants.NV_HYPERVISOR: hypervisors,
3433 constants.NV_HVPARAMS:
3434 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3435 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3436 for node in node_data_list
3437 if not node.offline],
3438 constants.NV_INSTANCELIST: hypervisors,
3439 constants.NV_VERSION: None,
3440 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3441 constants.NV_NODESETUP: None,
3442 constants.NV_TIME: None,
3443 constants.NV_MASTERIP: (master_node, master_ip),
3444 constants.NV_OSLIST: None,
3445 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3446 constants.NV_USERSCRIPTS: user_scripts,
3449 if vg_name is not None:
3450 node_verify_param[constants.NV_VGLIST] = None
3451 node_verify_param[constants.NV_LVLIST] = vg_name
3452 node_verify_param[constants.NV_PVLIST] = [vg_name]
3455 node_verify_param[constants.NV_DRBDLIST] = None
3456 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3458 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3459 # Load file storage paths only from master node
3460 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3463 # FIXME: this needs to be changed per node-group, not cluster-wide
3465 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3466 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3467 bridges.add(default_nicpp[constants.NIC_LINK])
3468 for instance in self.my_inst_info.values():
3469 for nic in instance.nics:
3470 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3471 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3472 bridges.add(full_nic[constants.NIC_LINK])
3475 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3477 # Build our expected cluster state
3478 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3480 vm_capable=node.vm_capable))
3481 for node in node_data_list)
3485 for node in self.all_node_info.values():
3486 path = _SupportsOob(self.cfg, node)
3487 if path and path not in oob_paths:
3488 oob_paths.append(path)
3491 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3493 for instance in self.my_inst_names:
3494 inst_config = self.my_inst_info[instance]
3495 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3498 for nname in inst_config.all_nodes:
3499 if nname not in node_image:
3500 gnode = self.NodeImage(name=nname)
3501 gnode.ghost = (nname not in self.all_node_info)
3502 node_image[nname] = gnode
3504 inst_config.MapLVsByNode(node_vol_should)
3506 pnode = inst_config.primary_node
3507 node_image[pnode].pinst.append(instance)
3509 for snode in inst_config.secondary_nodes:
3510 nimg = node_image[snode]
3511 nimg.sinst.append(instance)
3512 if pnode not in nimg.sbp:
3513 nimg.sbp[pnode] = []
3514 nimg.sbp[pnode].append(instance)
3516 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3517 # The value of exclusive_storage should be the same across the group, so if
3518 # it's True for at least a node, we act as if it were set for all the nodes
3519 self._exclusive_storage = compat.any(es_flags.values())
3520 if self._exclusive_storage:
3521 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3523 # At this point, we have the in-memory data structures complete,
3524 # except for the runtime information, which we'll gather next
3526 # Due to the way our RPC system works, exact response times cannot be
3527 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3528 # time before and after executing the request, we can at least have a time
3530 nvinfo_starttime = time.time()
3531 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3533 self.cfg.GetClusterName())
3534 nvinfo_endtime = time.time()
3536 if self.extra_lv_nodes and vg_name is not None:
3538 self.rpc.call_node_verify(self.extra_lv_nodes,
3539 {constants.NV_LVLIST: vg_name},
3540 self.cfg.GetClusterName())
3542 extra_lv_nvinfo = {}
3544 all_drbd_map = self.cfg.ComputeDRBDMap()
3546 feedback_fn("* Gathering disk information (%s nodes)" %
3547 len(self.my_node_names))
3548 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3551 feedback_fn("* Verifying configuration file consistency")
3553 # If not all nodes are being checked, we need to make sure the master node
3554 # and a non-checked vm_capable node are in the list.
3555 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3557 vf_nvinfo = all_nvinfo.copy()
3558 vf_node_info = list(self.my_node_info.values())
3559 additional_nodes = []
3560 if master_node not in self.my_node_info:
3561 additional_nodes.append(master_node)
3562 vf_node_info.append(self.all_node_info[master_node])
3563 # Add the first vm_capable node we find which is not included,
3564 # excluding the master node (which we already have)
3565 for node in absent_nodes:
3566 nodeinfo = self.all_node_info[node]
3567 if (nodeinfo.vm_capable and not nodeinfo.offline and
3568 node != master_node):
3569 additional_nodes.append(node)
3570 vf_node_info.append(self.all_node_info[node])
3572 key = constants.NV_FILELIST
3573 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3574 {key: node_verify_param[key]},
3575 self.cfg.GetClusterName()))
3577 vf_nvinfo = all_nvinfo
3578 vf_node_info = self.my_node_info.values()
3580 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3582 feedback_fn("* Verifying node status")
3586 for node_i in node_data_list:
3588 nimg = node_image[node]
3592 feedback_fn("* Skipping offline node %s" % (node,))
3596 if node == master_node:
3598 elif node_i.master_candidate:
3599 ntype = "master candidate"
3600 elif node_i.drained:
3606 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3608 msg = all_nvinfo[node].fail_msg
3609 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3612 nimg.rpc_fail = True
3615 nresult = all_nvinfo[node].payload
3617 nimg.call_ok = self._VerifyNode(node_i, nresult)
3618 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3619 self._VerifyNodeNetwork(node_i, nresult)
3620 self._VerifyNodeUserScripts(node_i, nresult)
3621 self._VerifyOob(node_i, nresult)
3622 self._VerifyFileStoragePaths(node_i, nresult,
3623 node == master_node)
3626 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3627 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3630 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3631 self._UpdateNodeInstances(node_i, nresult, nimg)
3632 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3633 self._UpdateNodeOS(node_i, nresult, nimg)
3635 if not nimg.os_fail:
3636 if refos_img is None:
3638 self._VerifyNodeOS(node_i, nimg, refos_img)
3639 self._VerifyNodeBridges(node_i, nresult, bridges)
3641 # Check whether all running instancies are primary for the node. (This
3642 # can no longer be done from _VerifyInstance below, since some of the
3643 # wrong instances could be from other node groups.)
3644 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3646 for inst in non_primary_inst:
3647 test = inst in self.all_inst_info
3648 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3649 "instance should not run on node %s", node_i.name)
3650 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3651 "node is running unknown instance %s", inst)
3653 self._VerifyGroupLVM(node_image, vg_name)
3655 for node, result in extra_lv_nvinfo.items():
3656 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3657 node_image[node], vg_name)
3659 feedback_fn("* Verifying instance status")
3660 for instance in self.my_inst_names:
3662 feedback_fn("* Verifying instance %s" % instance)
3663 inst_config = self.my_inst_info[instance]
3664 self._VerifyInstance(instance, inst_config, node_image,
3667 # If the instance is non-redundant we cannot survive losing its primary
3668 # node, so we are not N+1 compliant.
3669 if inst_config.disk_template not in constants.DTS_MIRRORED:
3670 i_non_redundant.append(instance)
3672 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3673 i_non_a_balanced.append(instance)
3675 feedback_fn("* Verifying orphan volumes")
3676 reserved = utils.FieldSet(*cluster.reserved_lvs)
3678 # We will get spurious "unknown volume" warnings if any node of this group
3679 # is secondary for an instance whose primary is in another group. To avoid
3680 # them, we find these instances and add their volumes to node_vol_should.
3681 for inst in self.all_inst_info.values():
3682 for secondary in inst.secondary_nodes:
3683 if (secondary in self.my_node_info
3684 and inst.name not in self.my_inst_info):
3685 inst.MapLVsByNode(node_vol_should)
3688 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3690 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3691 feedback_fn("* Verifying N+1 Memory redundancy")
3692 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3694 feedback_fn("* Other Notes")
3696 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3697 % len(i_non_redundant))
3699 if i_non_a_balanced:
3700 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3701 % len(i_non_a_balanced))
3704 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3707 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3710 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3714 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3715 """Analyze the post-hooks' result
3717 This method analyses the hook result, handles it, and sends some
3718 nicely-formatted feedback back to the user.
3720 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3721 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3722 @param hooks_results: the results of the multi-node hooks rpc call
3723 @param feedback_fn: function used send feedback back to the caller
3724 @param lu_result: previous Exec result
3725 @return: the new Exec result, based on the previous result
3729 # We only really run POST phase hooks, only for non-empty groups,
3730 # and are only interested in their results
3731 if not self.my_node_names:
3734 elif phase == constants.HOOKS_PHASE_POST:
3735 # Used to change hooks' output to proper indentation
3736 feedback_fn("* Hooks Results")
3737 assert hooks_results, "invalid result from hooks"
3739 for node_name in hooks_results:
3740 res = hooks_results[node_name]
3742 test = msg and not res.offline
3743 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3744 "Communication failure in hooks execution: %s", msg)
3745 if res.offline or msg:
3746 # No need to investigate payload if node is offline or gave
3749 for script, hkr, output in res.payload:
3750 test = hkr == constants.HKR_FAIL
3751 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3752 "Script %s failed, output:", script)
3754 output = self._HOOKS_INDENT_RE.sub(" ", output)
3755 feedback_fn("%s" % output)
3761 class LUClusterVerifyDisks(NoHooksLU):
3762 """Verifies the cluster disks status.
3767 def ExpandNames(self):
3768 self.share_locks = _ShareAll()
3769 self.needed_locks = {
3770 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3773 def Exec(self, feedback_fn):
3774 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3776 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3777 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3778 for group in group_names])
3781 class LUGroupVerifyDisks(NoHooksLU):
3782 """Verifies the status of all disks in a node group.
3787 def ExpandNames(self):
3788 # Raises errors.OpPrereqError on its own if group can't be found
3789 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3791 self.share_locks = _ShareAll()
3792 self.needed_locks = {
3793 locking.LEVEL_INSTANCE: [],
3794 locking.LEVEL_NODEGROUP: [],
3795 locking.LEVEL_NODE: [],
3797 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3798 # starts one instance of this opcode for every group, which means all
3799 # nodes will be locked for a short amount of time, so it's better to
3800 # acquire the node allocation lock as well.
3801 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3804 def DeclareLocks(self, level):
3805 if level == locking.LEVEL_INSTANCE:
3806 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3808 # Lock instances optimistically, needs verification once node and group
3809 # locks have been acquired
3810 self.needed_locks[locking.LEVEL_INSTANCE] = \
3811 self.cfg.GetNodeGroupInstances(self.group_uuid)
3813 elif level == locking.LEVEL_NODEGROUP:
3814 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3816 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3817 set([self.group_uuid] +
3818 # Lock all groups used by instances optimistically; this requires
3819 # going via the node before it's locked, requiring verification
3822 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3823 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3825 elif level == locking.LEVEL_NODE:
3826 # This will only lock the nodes in the group to be verified which contain
3828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3829 self._LockInstancesNodes()
3831 # Lock all nodes in group to be verified
3832 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3833 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3834 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3836 def CheckPrereq(self):
3837 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3838 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3839 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3841 assert self.group_uuid in owned_groups
3843 # Check if locked instances are still correct
3844 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3846 # Get instance information
3847 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3849 # Check if node groups for locked instances are still correct
3850 _CheckInstancesNodeGroups(self.cfg, self.instances,
3851 owned_groups, owned_nodes, self.group_uuid)
3853 def Exec(self, feedback_fn):
3854 """Verify integrity of cluster disks.
3856 @rtype: tuple of three items
3857 @return: a tuple of (dict of node-to-node_error, list of instances
3858 which need activate-disks, dict of instance: (node, volume) for
3863 res_instances = set()
3866 nv_dict = _MapInstanceDisksToNodes(
3867 [inst for inst in self.instances.values()
3868 if inst.admin_state == constants.ADMINST_UP])
3871 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3872 set(self.cfg.GetVmCapableNodeList()))
3874 node_lvs = self.rpc.call_lv_list(nodes, [])
3876 for (node, node_res) in node_lvs.items():
3877 if node_res.offline:
3880 msg = node_res.fail_msg
3882 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3883 res_nodes[node] = msg
3886 for lv_name, (_, _, lv_online) in node_res.payload.items():
3887 inst = nv_dict.pop((node, lv_name), None)
3888 if not (lv_online or inst is None):
3889 res_instances.add(inst)
3891 # any leftover items in nv_dict are missing LVs, let's arrange the data
3893 for key, inst in nv_dict.iteritems():
3894 res_missing.setdefault(inst, []).append(list(key))
3896 return (res_nodes, list(res_instances), res_missing)
3899 class LUClusterRepairDiskSizes(NoHooksLU):
3900 """Verifies the cluster disks sizes.
3905 def ExpandNames(self):
3906 if self.op.instances:
3907 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3908 # Not getting the node allocation lock as only a specific set of
3909 # instances (and their nodes) is going to be acquired
3910 self.needed_locks = {
3911 locking.LEVEL_NODE_RES: [],
3912 locking.LEVEL_INSTANCE: self.wanted_names,
3914 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3916 self.wanted_names = None
3917 self.needed_locks = {
3918 locking.LEVEL_NODE_RES: locking.ALL_SET,
3919 locking.LEVEL_INSTANCE: locking.ALL_SET,
3921 # This opcode is acquires the node locks for all instances
3922 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3925 self.share_locks = {
3926 locking.LEVEL_NODE_RES: 1,
3927 locking.LEVEL_INSTANCE: 0,
3928 locking.LEVEL_NODE_ALLOC: 1,
3931 def DeclareLocks(self, level):
3932 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3933 self._LockInstancesNodes(primary_only=True, level=level)
3935 def CheckPrereq(self):
3936 """Check prerequisites.
3938 This only checks the optional instance list against the existing names.
3941 if self.wanted_names is None:
3942 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3944 self.wanted_instances = \
3945 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3947 def _EnsureChildSizes(self, disk):
3948 """Ensure children of the disk have the needed disk size.
3950 This is valid mainly for DRBD8 and fixes an issue where the
3951 children have smaller disk size.
3953 @param disk: an L{ganeti.objects.Disk} object
3956 if disk.dev_type == constants.LD_DRBD8:
3957 assert disk.children, "Empty children for DRBD8?"
3958 fchild = disk.children[0]
3959 mismatch = fchild.size < disk.size
3961 self.LogInfo("Child disk has size %d, parent %d, fixing",
3962 fchild.size, disk.size)
3963 fchild.size = disk.size
3965 # and we recurse on this child only, not on the metadev
3966 return self._EnsureChildSizes(fchild) or mismatch
3970 def Exec(self, feedback_fn):
3971 """Verify the size of cluster disks.
3974 # TODO: check child disks too
3975 # TODO: check differences in size between primary/secondary nodes
3977 for instance in self.wanted_instances:
3978 pnode = instance.primary_node
3979 if pnode not in per_node_disks:
3980 per_node_disks[pnode] = []
3981 for idx, disk in enumerate(instance.disks):
3982 per_node_disks[pnode].append((instance, idx, disk))
3984 assert not (frozenset(per_node_disks.keys()) -
3985 self.owned_locks(locking.LEVEL_NODE_RES)), \
3986 "Not owning correct locks"
3987 assert not self.owned_locks(locking.LEVEL_NODE)
3990 for node, dskl in per_node_disks.items():
3991 newl = [v[2].Copy() for v in dskl]
3993 self.cfg.SetDiskID(dsk, node)
3994 result = self.rpc.call_blockdev_getsize(node, newl)
3996 self.LogWarning("Failure in blockdev_getsize call to node"
3997 " %s, ignoring", node)
3999 if len(result.payload) != len(dskl):
4000 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4001 " result.payload=%s", node, len(dskl), result.payload)
4002 self.LogWarning("Invalid result from node %s, ignoring node results",
4005 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4007 self.LogWarning("Disk %d of instance %s did not return size"
4008 " information, ignoring", idx, instance.name)
4010 if not isinstance(size, (int, long)):
4011 self.LogWarning("Disk %d of instance %s did not return valid"
4012 " size information, ignoring", idx, instance.name)
4015 if size != disk.size:
4016 self.LogInfo("Disk %d of instance %s has mismatched size,"
4017 " correcting: recorded %d, actual %d", idx,
4018 instance.name, disk.size, size)
4020 self.cfg.Update(instance, feedback_fn)
4021 changed.append((instance.name, idx, size))
4022 if self._EnsureChildSizes(disk):
4023 self.cfg.Update(instance, feedback_fn)
4024 changed.append((instance.name, idx, disk.size))
4028 class LUClusterRename(LogicalUnit):
4029 """Rename the cluster.
4032 HPATH = "cluster-rename"
4033 HTYPE = constants.HTYPE_CLUSTER
4035 def BuildHooksEnv(self):
4040 "OP_TARGET": self.cfg.GetClusterName(),
4041 "NEW_NAME": self.op.name,
4044 def BuildHooksNodes(self):
4045 """Build hooks nodes.
4048 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4050 def CheckPrereq(self):
4051 """Verify that the passed name is a valid one.
4054 hostname = netutils.GetHostname(name=self.op.name,
4055 family=self.cfg.GetPrimaryIPFamily())
4057 new_name = hostname.name
4058 self.ip = new_ip = hostname.ip
4059 old_name = self.cfg.GetClusterName()
4060 old_ip = self.cfg.GetMasterIP()
4061 if new_name == old_name and new_ip == old_ip:
4062 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4063 " cluster has changed",
4065 if new_ip != old_ip:
4066 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4067 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4068 " reachable on the network" %
4069 new_ip, errors.ECODE_NOTUNIQUE)
4071 self.op.name = new_name
4073 def Exec(self, feedback_fn):
4074 """Rename the cluster.
4077 clustername = self.op.name
4080 # shutdown the master IP
4081 master_params = self.cfg.GetMasterNetworkParameters()
4082 ems = self.cfg.GetUseExternalMipScript()
4083 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4085 result.Raise("Could not disable the master role")
4088 cluster = self.cfg.GetClusterInfo()
4089 cluster.cluster_name = clustername
4090 cluster.master_ip = new_ip
4091 self.cfg.Update(cluster, feedback_fn)
4093 # update the known hosts file
4094 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4095 node_list = self.cfg.GetOnlineNodeList()
4097 node_list.remove(master_params.name)
4100 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4102 master_params.ip = new_ip
4103 result = self.rpc.call_node_activate_master_ip(master_params.name,
4105 msg = result.fail_msg
4107 self.LogWarning("Could not re-enable the master role on"
4108 " the master, please restart manually: %s", msg)
4113 def _ValidateNetmask(cfg, netmask):
4114 """Checks if a netmask is valid.
4116 @type cfg: L{config.ConfigWriter}
4117 @param cfg: The cluster configuration
4119 @param netmask: the netmask to be verified
4120 @raise errors.OpPrereqError: if the validation fails
4123 ip_family = cfg.GetPrimaryIPFamily()
4125 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4126 except errors.ProgrammerError:
4127 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4128 ip_family, errors.ECODE_INVAL)
4129 if not ipcls.ValidateNetmask(netmask):
4130 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4131 (netmask), errors.ECODE_INVAL)
4134 class LUClusterSetParams(LogicalUnit):
4135 """Change the parameters of the cluster.
4138 HPATH = "cluster-modify"
4139 HTYPE = constants.HTYPE_CLUSTER
4142 def CheckArguments(self):
4146 if self.op.uid_pool:
4147 uidpool.CheckUidPool(self.op.uid_pool)
4149 if self.op.add_uids:
4150 uidpool.CheckUidPool(self.op.add_uids)
4152 if self.op.remove_uids:
4153 uidpool.CheckUidPool(self.op.remove_uids)
4155 if self.op.master_netmask is not None:
4156 _ValidateNetmask(self.cfg, self.op.master_netmask)
4158 if self.op.diskparams:
4159 for dt_params in self.op.diskparams.values():
4160 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4162 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4163 except errors.OpPrereqError, err:
4164 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4167 def ExpandNames(self):
4168 # FIXME: in the future maybe other cluster params won't require checking on
4169 # all nodes to be modified.
4170 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4171 # resource locks the right thing, shouldn't it be the BGL instead?
4172 self.needed_locks = {
4173 locking.LEVEL_NODE: locking.ALL_SET,
4174 locking.LEVEL_INSTANCE: locking.ALL_SET,
4175 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4176 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4178 self.share_locks = _ShareAll()
4180 def BuildHooksEnv(self):
4185 "OP_TARGET": self.cfg.GetClusterName(),
4186 "NEW_VG_NAME": self.op.vg_name,
4189 def BuildHooksNodes(self):
4190 """Build hooks nodes.
4193 mn = self.cfg.GetMasterNode()
4196 def CheckPrereq(self):
4197 """Check prerequisites.
4199 This checks whether the given params don't conflict and
4200 if the given volume group is valid.
4203 if self.op.vg_name is not None and not self.op.vg_name:
4204 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4205 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4206 " instances exist", errors.ECODE_INVAL)
4208 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4209 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4210 raise errors.OpPrereqError("Cannot disable drbd helper while"
4211 " drbd-based instances exist",
4214 node_list = self.owned_locks(locking.LEVEL_NODE)
4216 # if vg_name not None, checks given volume group on all nodes
4218 vglist = self.rpc.call_vg_list(node_list)
4219 for node in node_list:
4220 msg = vglist[node].fail_msg
4222 # ignoring down node
4223 self.LogWarning("Error while gathering data on node %s"
4224 " (ignoring node): %s", node, msg)
4226 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4228 constants.MIN_VG_SIZE)
4230 raise errors.OpPrereqError("Error on node '%s': %s" %
4231 (node, vgstatus), errors.ECODE_ENVIRON)
4233 if self.op.drbd_helper:
4234 # checks given drbd helper on all nodes
4235 helpers = self.rpc.call_drbd_helper(node_list)
4236 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4238 self.LogInfo("Not checking drbd helper on offline node %s", node)
4240 msg = helpers[node].fail_msg
4242 raise errors.OpPrereqError("Error checking drbd helper on node"
4243 " '%s': %s" % (node, msg),
4244 errors.ECODE_ENVIRON)
4245 node_helper = helpers[node].payload
4246 if node_helper != self.op.drbd_helper:
4247 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4248 (node, node_helper), errors.ECODE_ENVIRON)
4250 self.cluster = cluster = self.cfg.GetClusterInfo()
4251 # validate params changes
4252 if self.op.beparams:
4253 objects.UpgradeBeParams(self.op.beparams)
4254 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4255 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4257 if self.op.ndparams:
4258 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4259 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4261 # TODO: we need a more general way to handle resetting
4262 # cluster-level parameters to default values
4263 if self.new_ndparams["oob_program"] == "":
4264 self.new_ndparams["oob_program"] = \
4265 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4267 if self.op.hv_state:
4268 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4269 self.cluster.hv_state_static)
4270 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4271 for hv, values in new_hv_state.items())
4273 if self.op.disk_state:
4274 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4275 self.cluster.disk_state_static)
4276 self.new_disk_state = \
4277 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4278 for name, values in svalues.items()))
4279 for storage, svalues in new_disk_state.items())
4282 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4285 all_instances = self.cfg.GetAllInstancesInfo().values()
4287 for group in self.cfg.GetAllNodeGroupsInfo().values():
4288 instances = frozenset([inst for inst in all_instances
4289 if compat.any(node in group.members
4290 for node in inst.all_nodes)])
4291 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4292 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4293 new = _ComputeNewInstanceViolations(ipol,
4294 new_ipolicy, instances, self.cfg)
4296 violations.update(new)
4299 self.LogWarning("After the ipolicy change the following instances"
4300 " violate them: %s",
4301 utils.CommaJoin(utils.NiceSort(violations)))
4303 if self.op.nicparams:
4304 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4305 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4306 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4309 # check all instances for consistency
4310 for instance in self.cfg.GetAllInstancesInfo().values():
4311 for nic_idx, nic in enumerate(instance.nics):
4312 params_copy = copy.deepcopy(nic.nicparams)
4313 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4315 # check parameter syntax
4317 objects.NIC.CheckParameterSyntax(params_filled)
4318 except errors.ConfigurationError, err:
4319 nic_errors.append("Instance %s, nic/%d: %s" %
4320 (instance.name, nic_idx, err))
4322 # if we're moving instances to routed, check that they have an ip
4323 target_mode = params_filled[constants.NIC_MODE]
4324 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4325 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4326 " address" % (instance.name, nic_idx))
4328 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4329 "\n".join(nic_errors), errors.ECODE_INVAL)
4331 # hypervisor list/parameters
4332 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4333 if self.op.hvparams:
4334 for hv_name, hv_dict in self.op.hvparams.items():
4335 if hv_name not in self.new_hvparams:
4336 self.new_hvparams[hv_name] = hv_dict
4338 self.new_hvparams[hv_name].update(hv_dict)
4340 # disk template parameters
4341 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4342 if self.op.diskparams:
4343 for dt_name, dt_params in self.op.diskparams.items():
4344 if dt_name not in self.op.diskparams:
4345 self.new_diskparams[dt_name] = dt_params
4347 self.new_diskparams[dt_name].update(dt_params)
4349 # os hypervisor parameters
4350 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4352 for os_name, hvs in self.op.os_hvp.items():
4353 if os_name not in self.new_os_hvp:
4354 self.new_os_hvp[os_name] = hvs
4356 for hv_name, hv_dict in hvs.items():
4358 # Delete if it exists
4359 self.new_os_hvp[os_name].pop(hv_name, None)
4360 elif hv_name not in self.new_os_hvp[os_name]:
4361 self.new_os_hvp[os_name][hv_name] = hv_dict
4363 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4366 self.new_osp = objects.FillDict(cluster.osparams, {})
4367 if self.op.osparams:
4368 for os_name, osp in self.op.osparams.items():
4369 if os_name not in self.new_osp:
4370 self.new_osp[os_name] = {}
4372 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4375 if not self.new_osp[os_name]:
4376 # we removed all parameters
4377 del self.new_osp[os_name]
4379 # check the parameter validity (remote check)
4380 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4381 os_name, self.new_osp[os_name])
4383 # changes to the hypervisor list
4384 if self.op.enabled_hypervisors is not None:
4385 self.hv_list = self.op.enabled_hypervisors
4386 for hv in self.hv_list:
4387 # if the hypervisor doesn't already exist in the cluster
4388 # hvparams, we initialize it to empty, and then (in both
4389 # cases) we make sure to fill the defaults, as we might not
4390 # have a complete defaults list if the hypervisor wasn't
4392 if hv not in new_hvp:
4394 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4395 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4397 self.hv_list = cluster.enabled_hypervisors
4399 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4400 # either the enabled list has changed, or the parameters have, validate
4401 for hv_name, hv_params in self.new_hvparams.items():
4402 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4403 (self.op.enabled_hypervisors and
4404 hv_name in self.op.enabled_hypervisors)):
4405 # either this is a new hypervisor, or its parameters have changed
4406 hv_class = hypervisor.GetHypervisorClass(hv_name)
4407 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4408 hv_class.CheckParameterSyntax(hv_params)
4409 _CheckHVParams(self, node_list, hv_name, hv_params)
4412 # no need to check any newly-enabled hypervisors, since the
4413 # defaults have already been checked in the above code-block
4414 for os_name, os_hvp in self.new_os_hvp.items():
4415 for hv_name, hv_params in os_hvp.items():
4416 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4417 # we need to fill in the new os_hvp on top of the actual hv_p
4418 cluster_defaults = self.new_hvparams.get(hv_name, {})
4419 new_osp = objects.FillDict(cluster_defaults, hv_params)
4420 hv_class = hypervisor.GetHypervisorClass(hv_name)
4421 hv_class.CheckParameterSyntax(new_osp)
4422 _CheckHVParams(self, node_list, hv_name, new_osp)
4424 if self.op.default_iallocator:
4425 alloc_script = utils.FindFile(self.op.default_iallocator,
4426 constants.IALLOCATOR_SEARCH_PATH,
4428 if alloc_script is None:
4429 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4430 " specified" % self.op.default_iallocator,
4433 def Exec(self, feedback_fn):
4434 """Change the parameters of the cluster.
4437 if self.op.vg_name is not None:
4438 new_volume = self.op.vg_name
4441 if new_volume != self.cfg.GetVGName():
4442 self.cfg.SetVGName(new_volume)
4444 feedback_fn("Cluster LVM configuration already in desired"
4445 " state, not changing")
4446 if self.op.drbd_helper is not None:
4447 new_helper = self.op.drbd_helper
4450 if new_helper != self.cfg.GetDRBDHelper():
4451 self.cfg.SetDRBDHelper(new_helper)
4453 feedback_fn("Cluster DRBD helper already in desired state,"
4455 if self.op.hvparams:
4456 self.cluster.hvparams = self.new_hvparams
4458 self.cluster.os_hvp = self.new_os_hvp
4459 if self.op.enabled_hypervisors is not None:
4460 self.cluster.hvparams = self.new_hvparams
4461 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4462 if self.op.beparams:
4463 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4464 if self.op.nicparams:
4465 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4467 self.cluster.ipolicy = self.new_ipolicy
4468 if self.op.osparams:
4469 self.cluster.osparams = self.new_osp
4470 if self.op.ndparams:
4471 self.cluster.ndparams = self.new_ndparams
4472 if self.op.diskparams:
4473 self.cluster.diskparams = self.new_diskparams
4474 if self.op.hv_state:
4475 self.cluster.hv_state_static = self.new_hv_state
4476 if self.op.disk_state:
4477 self.cluster.disk_state_static = self.new_disk_state
4479 if self.op.candidate_pool_size is not None:
4480 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4481 # we need to update the pool size here, otherwise the save will fail
4482 _AdjustCandidatePool(self, [])
4484 if self.op.maintain_node_health is not None:
4485 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4486 feedback_fn("Note: CONFD was disabled at build time, node health"
4487 " maintenance is not useful (still enabling it)")
4488 self.cluster.maintain_node_health = self.op.maintain_node_health
4490 if self.op.prealloc_wipe_disks is not None:
4491 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4493 if self.op.add_uids is not None:
4494 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4496 if self.op.remove_uids is not None:
4497 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4499 if self.op.uid_pool is not None:
4500 self.cluster.uid_pool = self.op.uid_pool
4502 if self.op.default_iallocator is not None:
4503 self.cluster.default_iallocator = self.op.default_iallocator
4505 if self.op.reserved_lvs is not None:
4506 self.cluster.reserved_lvs = self.op.reserved_lvs
4508 if self.op.use_external_mip_script is not None:
4509 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4511 def helper_os(aname, mods, desc):
4513 lst = getattr(self.cluster, aname)
4514 for key, val in mods:
4515 if key == constants.DDM_ADD:
4517 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4520 elif key == constants.DDM_REMOVE:
4524 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4526 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4528 if self.op.hidden_os:
4529 helper_os("hidden_os", self.op.hidden_os, "hidden")
4531 if self.op.blacklisted_os:
4532 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4534 if self.op.master_netdev:
4535 master_params = self.cfg.GetMasterNetworkParameters()
4536 ems = self.cfg.GetUseExternalMipScript()
4537 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4538 self.cluster.master_netdev)
4539 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4541 result.Raise("Could not disable the master ip")
4542 feedback_fn("Changing master_netdev from %s to %s" %
4543 (master_params.netdev, self.op.master_netdev))
4544 self.cluster.master_netdev = self.op.master_netdev
4546 if self.op.master_netmask:
4547 master_params = self.cfg.GetMasterNetworkParameters()
4548 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4549 result = self.rpc.call_node_change_master_netmask(master_params.name,
4550 master_params.netmask,
4551 self.op.master_netmask,
4553 master_params.netdev)
4555 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4558 self.cluster.master_netmask = self.op.master_netmask
4560 self.cfg.Update(self.cluster, feedback_fn)
4562 if self.op.master_netdev:
4563 master_params = self.cfg.GetMasterNetworkParameters()
4564 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4565 self.op.master_netdev)
4566 ems = self.cfg.GetUseExternalMipScript()
4567 result = self.rpc.call_node_activate_master_ip(master_params.name,
4570 self.LogWarning("Could not re-enable the master ip on"
4571 " the master, please restart manually: %s",
4575 def _UploadHelper(lu, nodes, fname):
4576 """Helper for uploading a file and showing warnings.
4579 if os.path.exists(fname):
4580 result = lu.rpc.call_upload_file(nodes, fname)
4581 for to_node, to_result in result.items():
4582 msg = to_result.fail_msg
4584 msg = ("Copy of file %s to node %s failed: %s" %
4585 (fname, to_node, msg))
4589 def _ComputeAncillaryFiles(cluster, redist):
4590 """Compute files external to Ganeti which need to be consistent.
4592 @type redist: boolean
4593 @param redist: Whether to include files which need to be redistributed
4596 # Compute files for all nodes
4598 pathutils.SSH_KNOWN_HOSTS_FILE,
4599 pathutils.CONFD_HMAC_KEY,
4600 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4601 pathutils.SPICE_CERT_FILE,
4602 pathutils.SPICE_CACERT_FILE,
4603 pathutils.RAPI_USERS_FILE,
4607 # we need to ship at least the RAPI certificate
4608 files_all.add(pathutils.RAPI_CERT_FILE)
4610 files_all.update(pathutils.ALL_CERT_FILES)
4611 files_all.update(ssconf.SimpleStore().GetFileList())
4613 if cluster.modify_etc_hosts:
4614 files_all.add(pathutils.ETC_HOSTS)
4616 if cluster.use_external_mip_script:
4617 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4619 # Files which are optional, these must:
4620 # - be present in one other category as well
4621 # - either exist or not exist on all nodes of that category (mc, vm all)
4623 pathutils.RAPI_USERS_FILE,
4626 # Files which should only be on master candidates
4630 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4634 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4635 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4636 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4638 # Files which should only be on VM-capable nodes
4641 for hv_name in cluster.enabled_hypervisors
4643 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4647 for hv_name in cluster.enabled_hypervisors
4649 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4651 # Filenames in each category must be unique
4652 all_files_set = files_all | files_mc | files_vm
4653 assert (len(all_files_set) ==
4654 sum(map(len, [files_all, files_mc, files_vm]))), \
4655 "Found file listed in more than one file list"
4657 # Optional files must be present in one other category
4658 assert all_files_set.issuperset(files_opt), \
4659 "Optional file not in a different required list"
4661 # This one file should never ever be re-distributed via RPC
4662 assert not (redist and
4663 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4665 return (files_all, files_opt, files_mc, files_vm)
4668 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4669 """Distribute additional files which are part of the cluster configuration.
4671 ConfigWriter takes care of distributing the config and ssconf files, but
4672 there are more files which should be distributed to all nodes. This function
4673 makes sure those are copied.
4675 @param lu: calling logical unit
4676 @param additional_nodes: list of nodes not in the config to distribute to
4677 @type additional_vm: boolean
4678 @param additional_vm: whether the additional nodes are vm-capable or not
4681 # Gather target nodes
4682 cluster = lu.cfg.GetClusterInfo()
4683 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4685 online_nodes = lu.cfg.GetOnlineNodeList()
4686 online_set = frozenset(online_nodes)
4687 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4689 if additional_nodes is not None:
4690 online_nodes.extend(additional_nodes)
4692 vm_nodes.extend(additional_nodes)
4694 # Never distribute to master node
4695 for nodelist in [online_nodes, vm_nodes]:
4696 if master_info.name in nodelist:
4697 nodelist.remove(master_info.name)
4700 (files_all, _, files_mc, files_vm) = \
4701 _ComputeAncillaryFiles(cluster, True)
4703 # Never re-distribute configuration file from here
4704 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4705 pathutils.CLUSTER_CONF_FILE in files_vm)
4706 assert not files_mc, "Master candidates not handled in this function"
4709 (online_nodes, files_all),
4710 (vm_nodes, files_vm),
4714 for (node_list, files) in filemap:
4716 _UploadHelper(lu, node_list, fname)
4719 class LUClusterRedistConf(NoHooksLU):
4720 """Force the redistribution of cluster configuration.
4722 This is a very simple LU.
4727 def ExpandNames(self):
4728 self.needed_locks = {
4729 locking.LEVEL_NODE: locking.ALL_SET,
4730 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4732 self.share_locks = _ShareAll()
4734 def Exec(self, feedback_fn):
4735 """Redistribute the configuration.
4738 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4739 _RedistributeAncillaryFiles(self)
4742 class LUClusterActivateMasterIp(NoHooksLU):
4743 """Activate the master IP on the master node.
4746 def Exec(self, feedback_fn):
4747 """Activate the master IP.
4750 master_params = self.cfg.GetMasterNetworkParameters()
4751 ems = self.cfg.GetUseExternalMipScript()
4752 result = self.rpc.call_node_activate_master_ip(master_params.name,
4754 result.Raise("Could not activate the master IP")
4757 class LUClusterDeactivateMasterIp(NoHooksLU):
4758 """Deactivate the master IP on the master node.
4761 def Exec(self, feedback_fn):
4762 """Deactivate the master IP.
4765 master_params = self.cfg.GetMasterNetworkParameters()
4766 ems = self.cfg.GetUseExternalMipScript()
4767 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4769 result.Raise("Could not deactivate the master IP")
4772 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4773 """Sleep and poll for an instance's disk to sync.
4776 if not instance.disks or disks is not None and not disks:
4779 disks = _ExpandCheckDisks(instance, disks)
4782 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4784 node = instance.primary_node
4787 lu.cfg.SetDiskID(dev, node)
4789 # TODO: Convert to utils.Retry
4792 degr_retries = 10 # in seconds, as we sleep 1 second each time
4796 cumul_degraded = False
4797 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4798 msg = rstats.fail_msg
4800 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4803 raise errors.RemoteError("Can't contact node %s for mirror data,"
4804 " aborting." % node)
4807 rstats = rstats.payload
4809 for i, mstat in enumerate(rstats):
4811 lu.LogWarning("Can't compute data for node %s/%s",
4812 node, disks[i].iv_name)
4815 cumul_degraded = (cumul_degraded or
4816 (mstat.is_degraded and mstat.sync_percent is None))
4817 if mstat.sync_percent is not None:
4819 if mstat.estimated_time is not None:
4820 rem_time = ("%s remaining (estimated)" %
4821 utils.FormatSeconds(mstat.estimated_time))
4822 max_time = mstat.estimated_time
4824 rem_time = "no time estimate"
4825 lu.LogInfo("- device %s: %5.2f%% done, %s",
4826 disks[i].iv_name, mstat.sync_percent, rem_time)
4828 # if we're done but degraded, let's do a few small retries, to
4829 # make sure we see a stable and not transient situation; therefore
4830 # we force restart of the loop
4831 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4832 logging.info("Degraded disks found, %d retries left", degr_retries)
4840 time.sleep(min(60, max_time))
4843 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4845 return not cumul_degraded
4848 def _BlockdevFind(lu, node, dev, instance):
4849 """Wrapper around call_blockdev_find to annotate diskparams.
4851 @param lu: A reference to the lu object
4852 @param node: The node to call out
4853 @param dev: The device to find
4854 @param instance: The instance object the device belongs to
4855 @returns The result of the rpc call
4858 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4859 return lu.rpc.call_blockdev_find(node, disk)
4862 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4863 """Wrapper around L{_CheckDiskConsistencyInner}.
4866 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4867 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4871 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4873 """Check that mirrors are not degraded.
4875 @attention: The device has to be annotated already.
4877 The ldisk parameter, if True, will change the test from the
4878 is_degraded attribute (which represents overall non-ok status for
4879 the device(s)) to the ldisk (representing the local storage status).
4882 lu.cfg.SetDiskID(dev, node)
4886 if on_primary or dev.AssembleOnSecondary():
4887 rstats = lu.rpc.call_blockdev_find(node, dev)
4888 msg = rstats.fail_msg
4890 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4892 elif not rstats.payload:
4893 lu.LogWarning("Can't find disk on node %s", node)
4897 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4899 result = result and not rstats.payload.is_degraded
4902 for child in dev.children:
4903 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4909 class LUOobCommand(NoHooksLU):
4910 """Logical unit for OOB handling.
4914 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4916 def ExpandNames(self):
4917 """Gather locks we need.
4920 if self.op.node_names:
4921 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4922 lock_names = self.op.node_names
4924 lock_names = locking.ALL_SET
4926 self.needed_locks = {
4927 locking.LEVEL_NODE: lock_names,
4930 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4932 if not self.op.node_names:
4933 # Acquire node allocation lock only if all nodes are affected
4934 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4936 def CheckPrereq(self):
4937 """Check prerequisites.
4940 - the node exists in the configuration
4943 Any errors are signaled by raising errors.OpPrereqError.
4947 self.master_node = self.cfg.GetMasterNode()
4949 assert self.op.power_delay >= 0.0
4951 if self.op.node_names:
4952 if (self.op.command in self._SKIP_MASTER and
4953 self.master_node in self.op.node_names):
4954 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4955 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4957 if master_oob_handler:
4958 additional_text = ("run '%s %s %s' if you want to operate on the"
4959 " master regardless") % (master_oob_handler,
4963 additional_text = "it does not support out-of-band operations"
4965 raise errors.OpPrereqError(("Operating on the master node %s is not"
4966 " allowed for %s; %s") %
4967 (self.master_node, self.op.command,
4968 additional_text), errors.ECODE_INVAL)
4970 self.op.node_names = self.cfg.GetNodeList()
4971 if self.op.command in self._SKIP_MASTER:
4972 self.op.node_names.remove(self.master_node)
4974 if self.op.command in self._SKIP_MASTER:
4975 assert self.master_node not in self.op.node_names
4977 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4979 raise errors.OpPrereqError("Node %s not found" % node_name,
4982 self.nodes.append(node)
4984 if (not self.op.ignore_status and
4985 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4986 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4987 " not marked offline") % node_name,
4990 def Exec(self, feedback_fn):
4991 """Execute OOB and return result if we expect any.
4994 master_node = self.master_node
4997 for idx, node in enumerate(utils.NiceSort(self.nodes,
4998 key=lambda node: node.name)):
4999 node_entry = [(constants.RS_NORMAL, node.name)]
5000 ret.append(node_entry)
5002 oob_program = _SupportsOob(self.cfg, node)
5005 node_entry.append((constants.RS_UNAVAIL, None))
5008 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5009 self.op.command, oob_program, node.name)
5010 result = self.rpc.call_run_oob(master_node, oob_program,
5011 self.op.command, node.name,
5015 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5016 node.name, result.fail_msg)
5017 node_entry.append((constants.RS_NODATA, None))
5020 self._CheckPayload(result)
5021 except errors.OpExecError, err:
5022 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5024 node_entry.append((constants.RS_NODATA, None))
5026 if self.op.command == constants.OOB_HEALTH:
5027 # For health we should log important events
5028 for item, status in result.payload:
5029 if status in [constants.OOB_STATUS_WARNING,
5030 constants.OOB_STATUS_CRITICAL]:
5031 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5032 item, node.name, status)
5034 if self.op.command == constants.OOB_POWER_ON:
5036 elif self.op.command == constants.OOB_POWER_OFF:
5037 node.powered = False
5038 elif self.op.command == constants.OOB_POWER_STATUS:
5039 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5040 if powered != node.powered:
5041 logging.warning(("Recorded power state (%s) of node '%s' does not"
5042 " match actual power state (%s)"), node.powered,
5045 # For configuration changing commands we should update the node
5046 if self.op.command in (constants.OOB_POWER_ON,
5047 constants.OOB_POWER_OFF):
5048 self.cfg.Update(node, feedback_fn)
5050 node_entry.append((constants.RS_NORMAL, result.payload))
5052 if (self.op.command == constants.OOB_POWER_ON and
5053 idx < len(self.nodes) - 1):
5054 time.sleep(self.op.power_delay)
5058 def _CheckPayload(self, result):
5059 """Checks if the payload is valid.
5061 @param result: RPC result
5062 @raises errors.OpExecError: If payload is not valid
5066 if self.op.command == constants.OOB_HEALTH:
5067 if not isinstance(result.payload, list):
5068 errs.append("command 'health' is expected to return a list but got %s" %
5069 type(result.payload))
5071 for item, status in result.payload:
5072 if status not in constants.OOB_STATUSES:
5073 errs.append("health item '%s' has invalid status '%s'" %
5076 if self.op.command == constants.OOB_POWER_STATUS:
5077 if not isinstance(result.payload, dict):
5078 errs.append("power-status is expected to return a dict but got %s" %
5079 type(result.payload))
5081 if self.op.command in [
5082 constants.OOB_POWER_ON,
5083 constants.OOB_POWER_OFF,
5084 constants.OOB_POWER_CYCLE,
5086 if result.payload is not None:
5087 errs.append("%s is expected to not return payload but got '%s'" %
5088 (self.op.command, result.payload))
5091 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5092 utils.CommaJoin(errs))
5095 class _OsQuery(_QueryBase):
5096 FIELDS = query.OS_FIELDS
5098 def ExpandNames(self, lu):
5099 # Lock all nodes in shared mode
5100 # Temporary removal of locks, should be reverted later
5101 # TODO: reintroduce locks when they are lighter-weight
5102 lu.needed_locks = {}
5103 #self.share_locks[locking.LEVEL_NODE] = 1
5104 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5106 # The following variables interact with _QueryBase._GetNames
5108 self.wanted = self.names
5110 self.wanted = locking.ALL_SET
5112 self.do_locking = self.use_locking
5114 def DeclareLocks(self, lu, level):
5118 def _DiagnoseByOS(rlist):
5119 """Remaps a per-node return list into an a per-os per-node dictionary
5121 @param rlist: a map with node names as keys and OS objects as values
5124 @return: a dictionary with osnames as keys and as value another
5125 map, with nodes as keys and tuples of (path, status, diagnose,
5126 variants, parameters, api_versions) as values, eg::
5128 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5129 (/srv/..., False, "invalid api")],
5130 "node2": [(/srv/..., True, "", [], [])]}
5135 # we build here the list of nodes that didn't fail the RPC (at RPC
5136 # level), so that nodes with a non-responding node daemon don't
5137 # make all OSes invalid
5138 good_nodes = [node_name for node_name in rlist
5139 if not rlist[node_name].fail_msg]
5140 for node_name, nr in rlist.items():
5141 if nr.fail_msg or not nr.payload:
5143 for (name, path, status, diagnose, variants,
5144 params, api_versions) in nr.payload:
5145 if name not in all_os:
5146 # build a list of nodes for this os containing empty lists
5147 # for each node in node_list
5149 for nname in good_nodes:
5150 all_os[name][nname] = []
5151 # convert params from [name, help] to (name, help)
5152 params = [tuple(v) for v in params]
5153 all_os[name][node_name].append((path, status, diagnose,
5154 variants, params, api_versions))
5157 def _GetQueryData(self, lu):
5158 """Computes the list of nodes and their attributes.
5161 # Locking is not used
5162 assert not (compat.any(lu.glm.is_owned(level)
5163 for level in locking.LEVELS
5164 if level != locking.LEVEL_CLUSTER) or
5165 self.do_locking or self.use_locking)
5167 valid_nodes = [node.name
5168 for node in lu.cfg.GetAllNodesInfo().values()
5169 if not node.offline and node.vm_capable]
5170 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5171 cluster = lu.cfg.GetClusterInfo()
5175 for (os_name, os_data) in pol.items():
5176 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5177 hidden=(os_name in cluster.hidden_os),
5178 blacklisted=(os_name in cluster.blacklisted_os))
5182 api_versions = set()
5184 for idx, osl in enumerate(os_data.values()):
5185 info.valid = bool(info.valid and osl and osl[0][1])
5189 (node_variants, node_params, node_api) = osl[0][3:6]
5192 variants.update(node_variants)
5193 parameters.update(node_params)
5194 api_versions.update(node_api)
5196 # Filter out inconsistent values
5197 variants.intersection_update(node_variants)
5198 parameters.intersection_update(node_params)
5199 api_versions.intersection_update(node_api)
5201 info.variants = list(variants)
5202 info.parameters = list(parameters)
5203 info.api_versions = list(api_versions)
5205 data[os_name] = info
5207 # Prepare data in requested order
5208 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5212 class LUOsDiagnose(NoHooksLU):
5213 """Logical unit for OS diagnose/query.
5219 def _BuildFilter(fields, names):
5220 """Builds a filter for querying OSes.
5223 name_filter = qlang.MakeSimpleFilter("name", names)
5225 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5226 # respective field is not requested
5227 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5228 for fname in ["hidden", "blacklisted"]
5229 if fname not in fields]
5230 if "valid" not in fields:
5231 status_filter.append([qlang.OP_TRUE, "valid"])
5234 status_filter.insert(0, qlang.OP_AND)
5236 status_filter = None
5238 if name_filter and status_filter:
5239 return [qlang.OP_AND, name_filter, status_filter]
5243 return status_filter
5245 def CheckArguments(self):
5246 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5247 self.op.output_fields, False)
5249 def ExpandNames(self):
5250 self.oq.ExpandNames(self)
5252 def Exec(self, feedback_fn):
5253 return self.oq.OldStyleQuery(self)
5256 class _ExtStorageQuery(_QueryBase):
5257 FIELDS = query.EXTSTORAGE_FIELDS
5259 def ExpandNames(self, lu):
5260 # Lock all nodes in shared mode
5261 # Temporary removal of locks, should be reverted later
5262 # TODO: reintroduce locks when they are lighter-weight
5263 lu.needed_locks = {}
5264 #self.share_locks[locking.LEVEL_NODE] = 1
5265 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5267 # The following variables interact with _QueryBase._GetNames
5269 self.wanted = self.names
5271 self.wanted = locking.ALL_SET
5273 self.do_locking = self.use_locking
5275 def DeclareLocks(self, lu, level):
5279 def _DiagnoseByProvider(rlist):
5280 """Remaps a per-node return list into an a per-provider per-node dictionary
5282 @param rlist: a map with node names as keys and ExtStorage objects as values
5285 @return: a dictionary with extstorage providers as keys and as
5286 value another map, with nodes as keys and tuples of
5287 (path, status, diagnose, parameters) as values, eg::
5289 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5290 "node2": [(/srv/..., False, "missing file")]
5291 "node3": [(/srv/..., True, "", [])]
5296 # we build here the list of nodes that didn't fail the RPC (at RPC
5297 # level), so that nodes with a non-responding node daemon don't
5298 # make all OSes invalid
5299 good_nodes = [node_name for node_name in rlist
5300 if not rlist[node_name].fail_msg]
5301 for node_name, nr in rlist.items():
5302 if nr.fail_msg or not nr.payload:
5304 for (name, path, status, diagnose, params) in nr.payload:
5305 if name not in all_es:
5306 # build a list of nodes for this os containing empty lists
5307 # for each node in node_list
5309 for nname in good_nodes:
5310 all_es[name][nname] = []
5311 # convert params from [name, help] to (name, help)
5312 params = [tuple(v) for v in params]
5313 all_es[name][node_name].append((path, status, diagnose, params))
5316 def _GetQueryData(self, lu):
5317 """Computes the list of nodes and their attributes.
5320 # Locking is not used
5321 assert not (compat.any(lu.glm.is_owned(level)
5322 for level in locking.LEVELS
5323 if level != locking.LEVEL_CLUSTER) or
5324 self.do_locking or self.use_locking)
5326 valid_nodes = [node.name
5327 for node in lu.cfg.GetAllNodesInfo().values()
5328 if not node.offline and node.vm_capable]
5329 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5333 nodegroup_list = lu.cfg.GetNodeGroupList()
5335 for (es_name, es_data) in pol.items():
5336 # For every provider compute the nodegroup validity.
5337 # To do this we need to check the validity of each node in es_data
5338 # and then construct the corresponding nodegroup dict:
5339 # { nodegroup1: status
5340 # nodegroup2: status
5343 for nodegroup in nodegroup_list:
5344 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5346 nodegroup_nodes = ndgrp.members
5347 nodegroup_name = ndgrp.name
5350 for node in nodegroup_nodes:
5351 if node in valid_nodes:
5352 if es_data[node] != []:
5353 node_status = es_data[node][0][1]
5354 node_statuses.append(node_status)
5356 node_statuses.append(False)
5358 if False in node_statuses:
5359 ndgrp_data[nodegroup_name] = False
5361 ndgrp_data[nodegroup_name] = True
5363 # Compute the provider's parameters
5365 for idx, esl in enumerate(es_data.values()):
5366 valid = bool(esl and esl[0][1])
5370 node_params = esl[0][3]
5373 parameters.update(node_params)
5375 # Filter out inconsistent values
5376 parameters.intersection_update(node_params)
5378 params = list(parameters)
5380 # Now fill all the info for this provider
5381 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5382 nodegroup_status=ndgrp_data,
5385 data[es_name] = info
5387 # Prepare data in requested order
5388 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5392 class LUExtStorageDiagnose(NoHooksLU):
5393 """Logical unit for ExtStorage diagnose/query.
5398 def CheckArguments(self):
5399 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5400 self.op.output_fields, False)
5402 def ExpandNames(self):
5403 self.eq.ExpandNames(self)
5405 def Exec(self, feedback_fn):
5406 return self.eq.OldStyleQuery(self)
5409 class LUNodeRemove(LogicalUnit):
5410 """Logical unit for removing a node.
5413 HPATH = "node-remove"
5414 HTYPE = constants.HTYPE_NODE
5416 def BuildHooksEnv(self):
5421 "OP_TARGET": self.op.node_name,
5422 "NODE_NAME": self.op.node_name,
5425 def BuildHooksNodes(self):
5426 """Build hooks nodes.
5428 This doesn't run on the target node in the pre phase as a failed
5429 node would then be impossible to remove.
5432 all_nodes = self.cfg.GetNodeList()
5434 all_nodes.remove(self.op.node_name)
5437 return (all_nodes, all_nodes)
5439 def CheckPrereq(self):
5440 """Check prerequisites.
5443 - the node exists in the configuration
5444 - it does not have primary or secondary instances
5445 - it's not the master
5447 Any errors are signaled by raising errors.OpPrereqError.
5450 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5451 node = self.cfg.GetNodeInfo(self.op.node_name)
5452 assert node is not None
5454 masternode = self.cfg.GetMasterNode()
5455 if node.name == masternode:
5456 raise errors.OpPrereqError("Node is the master node, failover to another"
5457 " node is required", errors.ECODE_INVAL)
5459 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5460 if node.name in instance.all_nodes:
5461 raise errors.OpPrereqError("Instance %s is still running on the node,"
5462 " please remove first" % instance_name,
5464 self.op.node_name = node.name
5467 def Exec(self, feedback_fn):
5468 """Removes the node from the cluster.
5472 logging.info("Stopping the node daemon and removing configs from node %s",
5475 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5477 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5480 # Promote nodes to master candidate as needed
5481 _AdjustCandidatePool(self, exceptions=[node.name])
5482 self.context.RemoveNode(node.name)
5484 # Run post hooks on the node before it's removed
5485 _RunPostHook(self, node.name)
5487 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5488 msg = result.fail_msg
5490 self.LogWarning("Errors encountered on the remote node while leaving"
5491 " the cluster: %s", msg)
5493 # Remove node from our /etc/hosts
5494 if self.cfg.GetClusterInfo().modify_etc_hosts:
5495 master_node = self.cfg.GetMasterNode()
5496 result = self.rpc.call_etc_hosts_modify(master_node,
5497 constants.ETC_HOSTS_REMOVE,
5499 result.Raise("Can't update hosts file with new host data")
5500 _RedistributeAncillaryFiles(self)
5503 class _NodeQuery(_QueryBase):
5504 FIELDS = query.NODE_FIELDS
5506 def ExpandNames(self, lu):
5507 lu.needed_locks = {}
5508 lu.share_locks = _ShareAll()
5511 self.wanted = _GetWantedNodes(lu, self.names)
5513 self.wanted = locking.ALL_SET
5515 self.do_locking = (self.use_locking and
5516 query.NQ_LIVE in self.requested_data)
5519 # If any non-static field is requested we need to lock the nodes
5520 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5521 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5523 def DeclareLocks(self, lu, level):
5526 def _GetQueryData(self, lu):
5527 """Computes the list of nodes and their attributes.
5530 all_info = lu.cfg.GetAllNodesInfo()
5532 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5534 # Gather data as requested
5535 if query.NQ_LIVE in self.requested_data:
5536 # filter out non-vm_capable nodes
5537 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5539 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5540 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5541 [lu.cfg.GetHypervisorType()], es_flags)
5542 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5543 for (name, nresult) in node_data.items()
5544 if not nresult.fail_msg and nresult.payload)
5548 if query.NQ_INST in self.requested_data:
5549 node_to_primary = dict([(name, set()) for name in nodenames])
5550 node_to_secondary = dict([(name, set()) for name in nodenames])
5552 inst_data = lu.cfg.GetAllInstancesInfo()
5554 for inst in inst_data.values():
5555 if inst.primary_node in node_to_primary:
5556 node_to_primary[inst.primary_node].add(inst.name)
5557 for secnode in inst.secondary_nodes:
5558 if secnode in node_to_secondary:
5559 node_to_secondary[secnode].add(inst.name)
5561 node_to_primary = None
5562 node_to_secondary = None
5564 if query.NQ_OOB in self.requested_data:
5565 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5566 for name, node in all_info.iteritems())
5570 if query.NQ_GROUP in self.requested_data:
5571 groups = lu.cfg.GetAllNodeGroupsInfo()
5575 return query.NodeQueryData([all_info[name] for name in nodenames],
5576 live_data, lu.cfg.GetMasterNode(),
5577 node_to_primary, node_to_secondary, groups,
5578 oob_support, lu.cfg.GetClusterInfo())
5581 class LUNodeQuery(NoHooksLU):
5582 """Logical unit for querying nodes.
5585 # pylint: disable=W0142
5588 def CheckArguments(self):
5589 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5590 self.op.output_fields, self.op.use_locking)
5592 def ExpandNames(self):
5593 self.nq.ExpandNames(self)
5595 def DeclareLocks(self, level):
5596 self.nq.DeclareLocks(self, level)
5598 def Exec(self, feedback_fn):
5599 return self.nq.OldStyleQuery(self)
5602 class LUNodeQueryvols(NoHooksLU):
5603 """Logical unit for getting volumes on node(s).
5607 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5608 _FIELDS_STATIC = utils.FieldSet("node")
5610 def CheckArguments(self):
5611 _CheckOutputFields(static=self._FIELDS_STATIC,
5612 dynamic=self._FIELDS_DYNAMIC,
5613 selected=self.op.output_fields)
5615 def ExpandNames(self):
5616 self.share_locks = _ShareAll()
5619 self.needed_locks = {
5620 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5623 self.needed_locks = {
5624 locking.LEVEL_NODE: locking.ALL_SET,
5625 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5628 def Exec(self, feedback_fn):
5629 """Computes the list of nodes and their attributes.
5632 nodenames = self.owned_locks(locking.LEVEL_NODE)
5633 volumes = self.rpc.call_node_volumes(nodenames)
5635 ilist = self.cfg.GetAllInstancesInfo()
5636 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5639 for node in nodenames:
5640 nresult = volumes[node]
5643 msg = nresult.fail_msg
5645 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5648 node_vols = sorted(nresult.payload,
5649 key=operator.itemgetter("dev"))
5651 for vol in node_vols:
5653 for field in self.op.output_fields:
5656 elif field == "phys":
5660 elif field == "name":
5662 elif field == "size":
5663 val = int(float(vol["size"]))
5664 elif field == "instance":
5665 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5667 raise errors.ParameterError(field)
5668 node_output.append(str(val))
5670 output.append(node_output)
5675 class LUNodeQueryStorage(NoHooksLU):
5676 """Logical unit for getting information on storage units on node(s).
5679 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5682 def CheckArguments(self):
5683 _CheckOutputFields(static=self._FIELDS_STATIC,
5684 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5685 selected=self.op.output_fields)
5687 def ExpandNames(self):
5688 self.share_locks = _ShareAll()
5691 self.needed_locks = {
5692 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5695 self.needed_locks = {
5696 locking.LEVEL_NODE: locking.ALL_SET,
5697 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5700 def Exec(self, feedback_fn):
5701 """Computes the list of nodes and their attributes.
5704 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5706 # Always get name to sort by
5707 if constants.SF_NAME in self.op.output_fields:
5708 fields = self.op.output_fields[:]
5710 fields = [constants.SF_NAME] + self.op.output_fields
5712 # Never ask for node or type as it's only known to the LU
5713 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5714 while extra in fields:
5715 fields.remove(extra)
5717 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5718 name_idx = field_idx[constants.SF_NAME]
5720 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5721 data = self.rpc.call_storage_list(self.nodes,
5722 self.op.storage_type, st_args,
5723 self.op.name, fields)
5727 for node in utils.NiceSort(self.nodes):
5728 nresult = data[node]
5732 msg = nresult.fail_msg
5734 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5737 rows = dict([(row[name_idx], row) for row in nresult.payload])
5739 for name in utils.NiceSort(rows.keys()):
5744 for field in self.op.output_fields:
5745 if field == constants.SF_NODE:
5747 elif field == constants.SF_TYPE:
5748 val = self.op.storage_type
5749 elif field in field_idx:
5750 val = row[field_idx[field]]
5752 raise errors.ParameterError(field)
5761 class _InstanceQuery(_QueryBase):
5762 FIELDS = query.INSTANCE_FIELDS
5764 def ExpandNames(self, lu):
5765 lu.needed_locks = {}
5766 lu.share_locks = _ShareAll()
5769 self.wanted = _GetWantedInstances(lu, self.names)
5771 self.wanted = locking.ALL_SET
5773 self.do_locking = (self.use_locking and
5774 query.IQ_LIVE in self.requested_data)
5776 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5777 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5778 lu.needed_locks[locking.LEVEL_NODE] = []
5779 lu.needed_locks[locking.LEVEL_NETWORK] = []
5780 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5782 self.do_grouplocks = (self.do_locking and
5783 query.IQ_NODES in self.requested_data)
5785 def DeclareLocks(self, lu, level):
5787 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5788 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5790 # Lock all groups used by instances optimistically; this requires going
5791 # via the node before it's locked, requiring verification later on
5792 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5794 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5795 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5796 elif level == locking.LEVEL_NODE:
5797 lu._LockInstancesNodes() # pylint: disable=W0212
5799 elif level == locking.LEVEL_NETWORK:
5800 lu.needed_locks[locking.LEVEL_NETWORK] = \
5802 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5803 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5806 def _CheckGroupLocks(lu):
5807 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5808 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5810 # Check if node groups for locked instances are still correct
5811 for instance_name in owned_instances:
5812 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5814 def _GetQueryData(self, lu):
5815 """Computes the list of instances and their attributes.
5818 if self.do_grouplocks:
5819 self._CheckGroupLocks(lu)
5821 cluster = lu.cfg.GetClusterInfo()
5822 all_info = lu.cfg.GetAllInstancesInfo()
5824 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5826 instance_list = [all_info[name] for name in instance_names]
5827 nodes = frozenset(itertools.chain(*(inst.all_nodes
5828 for inst in instance_list)))
5829 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5832 wrongnode_inst = set()
5834 # Gather data as requested
5835 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5837 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5839 result = node_data[name]
5841 # offline nodes will be in both lists
5842 assert result.fail_msg
5843 offline_nodes.append(name)
5845 bad_nodes.append(name)
5846 elif result.payload:
5847 for inst in result.payload:
5848 if inst in all_info:
5849 if all_info[inst].primary_node == name:
5850 live_data.update(result.payload)
5852 wrongnode_inst.add(inst)
5854 # orphan instance; we don't list it here as we don't
5855 # handle this case yet in the output of instance listing
5856 logging.warning("Orphan instance '%s' found on node %s",
5858 # else no instance is alive
5862 if query.IQ_DISKUSAGE in self.requested_data:
5863 gmi = ganeti.masterd.instance
5864 disk_usage = dict((inst.name,
5865 gmi.ComputeDiskSize(inst.disk_template,
5866 [{constants.IDISK_SIZE: disk.size}
5867 for disk in inst.disks]))
5868 for inst in instance_list)
5872 if query.IQ_CONSOLE in self.requested_data:
5874 for inst in instance_list:
5875 if inst.name in live_data:
5876 # Instance is running
5877 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5879 consinfo[inst.name] = None
5880 assert set(consinfo.keys()) == set(instance_names)
5884 if query.IQ_NODES in self.requested_data:
5885 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5887 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5888 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5889 for uuid in set(map(operator.attrgetter("group"),
5895 if query.IQ_NETWORKS in self.requested_data:
5896 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5897 for i in instance_list))
5898 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5902 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5903 disk_usage, offline_nodes, bad_nodes,
5904 live_data, wrongnode_inst, consinfo,
5905 nodes, groups, networks)
5908 class LUQuery(NoHooksLU):
5909 """Query for resources/items of a certain kind.
5912 # pylint: disable=W0142
5915 def CheckArguments(self):
5916 qcls = _GetQueryImplementation(self.op.what)
5918 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5920 def ExpandNames(self):
5921 self.impl.ExpandNames(self)
5923 def DeclareLocks(self, level):
5924 self.impl.DeclareLocks(self, level)
5926 def Exec(self, feedback_fn):
5927 return self.impl.NewStyleQuery(self)
5930 class LUQueryFields(NoHooksLU):
5931 """Query for resources/items of a certain kind.
5934 # pylint: disable=W0142
5937 def CheckArguments(self):
5938 self.qcls = _GetQueryImplementation(self.op.what)
5940 def ExpandNames(self):
5941 self.needed_locks = {}
5943 def Exec(self, feedback_fn):
5944 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5947 class LUNodeModifyStorage(NoHooksLU):
5948 """Logical unit for modifying a storage volume on a node.
5953 def CheckArguments(self):
5954 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5956 storage_type = self.op.storage_type
5959 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5961 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5962 " modified" % storage_type,
5965 diff = set(self.op.changes.keys()) - modifiable
5967 raise errors.OpPrereqError("The following fields can not be modified for"
5968 " storage units of type '%s': %r" %
5969 (storage_type, list(diff)),
5972 def ExpandNames(self):
5973 self.needed_locks = {
5974 locking.LEVEL_NODE: self.op.node_name,
5977 def Exec(self, feedback_fn):
5978 """Computes the list of nodes and their attributes.
5981 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5982 result = self.rpc.call_storage_modify(self.op.node_name,
5983 self.op.storage_type, st_args,
5984 self.op.name, self.op.changes)
5985 result.Raise("Failed to modify storage unit '%s' on %s" %
5986 (self.op.name, self.op.node_name))
5989 class LUNodeAdd(LogicalUnit):
5990 """Logical unit for adding node to the cluster.
5994 HTYPE = constants.HTYPE_NODE
5995 _NFLAGS = ["master_capable", "vm_capable"]
5997 def CheckArguments(self):
5998 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5999 # validate/normalize the node name
6000 self.hostname = netutils.GetHostname(name=self.op.node_name,
6001 family=self.primary_ip_family)
6002 self.op.node_name = self.hostname.name
6004 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6005 raise errors.OpPrereqError("Cannot readd the master node",
6008 if self.op.readd and self.op.group:
6009 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6010 " being readded", errors.ECODE_INVAL)
6012 def BuildHooksEnv(self):
6015 This will run on all nodes before, and on all nodes + the new node after.
6019 "OP_TARGET": self.op.node_name,
6020 "NODE_NAME": self.op.node_name,
6021 "NODE_PIP": self.op.primary_ip,
6022 "NODE_SIP": self.op.secondary_ip,
6023 "MASTER_CAPABLE": str(self.op.master_capable),
6024 "VM_CAPABLE": str(self.op.vm_capable),
6027 def BuildHooksNodes(self):
6028 """Build hooks nodes.
6031 # Exclude added node
6032 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6033 post_nodes = pre_nodes + [self.op.node_name, ]
6035 return (pre_nodes, post_nodes)
6037 def CheckPrereq(self):
6038 """Check prerequisites.
6041 - the new node is not already in the config
6043 - its parameters (single/dual homed) matches the cluster
6045 Any errors are signaled by raising errors.OpPrereqError.
6049 hostname = self.hostname
6050 node = hostname.name
6051 primary_ip = self.op.primary_ip = hostname.ip
6052 if self.op.secondary_ip is None:
6053 if self.primary_ip_family == netutils.IP6Address.family:
6054 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6055 " IPv4 address must be given as secondary",
6057 self.op.secondary_ip = primary_ip
6059 secondary_ip = self.op.secondary_ip
6060 if not netutils.IP4Address.IsValid(secondary_ip):
6061 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6062 " address" % secondary_ip, errors.ECODE_INVAL)
6064 node_list = cfg.GetNodeList()
6065 if not self.op.readd and node in node_list:
6066 raise errors.OpPrereqError("Node %s is already in the configuration" %
6067 node, errors.ECODE_EXISTS)
6068 elif self.op.readd and node not in node_list:
6069 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6072 self.changed_primary_ip = False
6074 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6075 if self.op.readd and node == existing_node_name:
6076 if existing_node.secondary_ip != secondary_ip:
6077 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6078 " address configuration as before",
6080 if existing_node.primary_ip != primary_ip:
6081 self.changed_primary_ip = True
6085 if (existing_node.primary_ip == primary_ip or
6086 existing_node.secondary_ip == primary_ip or
6087 existing_node.primary_ip == secondary_ip or
6088 existing_node.secondary_ip == secondary_ip):
6089 raise errors.OpPrereqError("New node ip address(es) conflict with"
6090 " existing node %s" % existing_node.name,
6091 errors.ECODE_NOTUNIQUE)
6093 # After this 'if' block, None is no longer a valid value for the
6094 # _capable op attributes
6096 old_node = self.cfg.GetNodeInfo(node)
6097 assert old_node is not None, "Can't retrieve locked node %s" % node
6098 for attr in self._NFLAGS:
6099 if getattr(self.op, attr) is None:
6100 setattr(self.op, attr, getattr(old_node, attr))
6102 for attr in self._NFLAGS:
6103 if getattr(self.op, attr) is None:
6104 setattr(self.op, attr, True)
6106 if self.op.readd and not self.op.vm_capable:
6107 pri, sec = cfg.GetNodeInstances(node)
6109 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6110 " flag set to false, but it already holds"
6111 " instances" % node,
6114 # check that the type of the node (single versus dual homed) is the
6115 # same as for the master
6116 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6117 master_singlehomed = myself.secondary_ip == myself.primary_ip
6118 newbie_singlehomed = secondary_ip == primary_ip
6119 if master_singlehomed != newbie_singlehomed:
6120 if master_singlehomed:
6121 raise errors.OpPrereqError("The master has no secondary ip but the"
6122 " new node has one",
6125 raise errors.OpPrereqError("The master has a secondary ip but the"
6126 " new node doesn't have one",
6129 # checks reachability
6130 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6131 raise errors.OpPrereqError("Node not reachable by ping",
6132 errors.ECODE_ENVIRON)
6134 if not newbie_singlehomed:
6135 # check reachability from my secondary ip to newbie's secondary ip
6136 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6137 source=myself.secondary_ip):
6138 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6139 " based ping to node daemon port",
6140 errors.ECODE_ENVIRON)
6147 if self.op.master_capable:
6148 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6150 self.master_candidate = False
6153 self.new_node = old_node
6155 node_group = cfg.LookupNodeGroup(self.op.group)
6156 self.new_node = objects.Node(name=node,
6157 primary_ip=primary_ip,
6158 secondary_ip=secondary_ip,
6159 master_candidate=self.master_candidate,
6160 offline=False, drained=False,
6161 group=node_group, ndparams={})
6163 if self.op.ndparams:
6164 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6165 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6166 "node", "cluster or group")
6168 if self.op.hv_state:
6169 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6171 if self.op.disk_state:
6172 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6174 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6175 # it a property on the base class.
6176 rpcrunner = rpc.DnsOnlyRunner()
6177 result = rpcrunner.call_version([node])[node]
6178 result.Raise("Can't get version information from node %s" % node)
6179 if constants.PROTOCOL_VERSION == result.payload:
6180 logging.info("Communication to node %s fine, sw version %s match",
6181 node, result.payload)
6183 raise errors.OpPrereqError("Version mismatch master version %s,"
6184 " node version %s" %
6185 (constants.PROTOCOL_VERSION, result.payload),
6186 errors.ECODE_ENVIRON)
6188 vg_name = cfg.GetVGName()
6189 if vg_name is not None:
6190 vparams = {constants.NV_PVLIST: [vg_name]}
6191 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6192 cname = self.cfg.GetClusterName()
6193 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6194 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6196 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6197 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6199 def Exec(self, feedback_fn):
6200 """Adds the new node to the cluster.
6203 new_node = self.new_node
6204 node = new_node.name
6206 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6209 # We adding a new node so we assume it's powered
6210 new_node.powered = True
6212 # for re-adds, reset the offline/drained/master-candidate flags;
6213 # we need to reset here, otherwise offline would prevent RPC calls
6214 # later in the procedure; this also means that if the re-add
6215 # fails, we are left with a non-offlined, broken node
6217 new_node.drained = new_node.offline = False # pylint: disable=W0201
6218 self.LogInfo("Readding a node, the offline/drained flags were reset")
6219 # if we demote the node, we do cleanup later in the procedure
6220 new_node.master_candidate = self.master_candidate
6221 if self.changed_primary_ip:
6222 new_node.primary_ip = self.op.primary_ip
6224 # copy the master/vm_capable flags
6225 for attr in self._NFLAGS:
6226 setattr(new_node, attr, getattr(self.op, attr))
6228 # notify the user about any possible mc promotion
6229 if new_node.master_candidate:
6230 self.LogInfo("Node will be a master candidate")
6232 if self.op.ndparams:
6233 new_node.ndparams = self.op.ndparams
6235 new_node.ndparams = {}
6237 if self.op.hv_state:
6238 new_node.hv_state_static = self.new_hv_state
6240 if self.op.disk_state:
6241 new_node.disk_state_static = self.new_disk_state
6243 # Add node to our /etc/hosts, and add key to known_hosts
6244 if self.cfg.GetClusterInfo().modify_etc_hosts:
6245 master_node = self.cfg.GetMasterNode()
6246 result = self.rpc.call_etc_hosts_modify(master_node,
6247 constants.ETC_HOSTS_ADD,
6250 result.Raise("Can't update hosts file with new host data")
6252 if new_node.secondary_ip != new_node.primary_ip:
6253 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6256 node_verify_list = [self.cfg.GetMasterNode()]
6257 node_verify_param = {
6258 constants.NV_NODELIST: ([node], {}),
6259 # TODO: do a node-net-test as well?
6262 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6263 self.cfg.GetClusterName())
6264 for verifier in node_verify_list:
6265 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6266 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6268 for failed in nl_payload:
6269 feedback_fn("ssh/hostname verification failed"
6270 " (checking from %s): %s" %
6271 (verifier, nl_payload[failed]))
6272 raise errors.OpExecError("ssh/hostname verification failed")
6275 _RedistributeAncillaryFiles(self)
6276 self.context.ReaddNode(new_node)
6277 # make sure we redistribute the config
6278 self.cfg.Update(new_node, feedback_fn)
6279 # and make sure the new node will not have old files around
6280 if not new_node.master_candidate:
6281 result = self.rpc.call_node_demote_from_mc(new_node.name)
6282 msg = result.fail_msg
6284 self.LogWarning("Node failed to demote itself from master"
6285 " candidate status: %s" % msg)
6287 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6288 additional_vm=self.op.vm_capable)
6289 self.context.AddNode(new_node, self.proc.GetECId())
6292 class LUNodeSetParams(LogicalUnit):
6293 """Modifies the parameters of a node.
6295 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6296 to the node role (as _ROLE_*)
6297 @cvar _R2F: a dictionary from node role to tuples of flags
6298 @cvar _FLAGS: a list of attribute names corresponding to the flags
6301 HPATH = "node-modify"
6302 HTYPE = constants.HTYPE_NODE
6304 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6306 (True, False, False): _ROLE_CANDIDATE,
6307 (False, True, False): _ROLE_DRAINED,
6308 (False, False, True): _ROLE_OFFLINE,
6309 (False, False, False): _ROLE_REGULAR,
6311 _R2F = dict((v, k) for k, v in _F2R.items())
6312 _FLAGS = ["master_candidate", "drained", "offline"]
6314 def CheckArguments(self):
6315 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6316 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6317 self.op.master_capable, self.op.vm_capable,
6318 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6320 if all_mods.count(None) == len(all_mods):
6321 raise errors.OpPrereqError("Please pass at least one modification",
6323 if all_mods.count(True) > 1:
6324 raise errors.OpPrereqError("Can't set the node into more than one"
6325 " state at the same time",
6328 # Boolean value that tells us whether we might be demoting from MC
6329 self.might_demote = (self.op.master_candidate is False or
6330 self.op.offline is True or
6331 self.op.drained is True or
6332 self.op.master_capable is False)
6334 if self.op.secondary_ip:
6335 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6336 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6337 " address" % self.op.secondary_ip,
6340 self.lock_all = self.op.auto_promote and self.might_demote
6341 self.lock_instances = self.op.secondary_ip is not None
6343 def _InstanceFilter(self, instance):
6344 """Filter for getting affected instances.
6347 return (instance.disk_template in constants.DTS_INT_MIRROR and
6348 self.op.node_name in instance.all_nodes)
6350 def ExpandNames(self):
6352 self.needed_locks = {
6353 locking.LEVEL_NODE: locking.ALL_SET,
6355 # Block allocations when all nodes are locked
6356 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6359 self.needed_locks = {
6360 locking.LEVEL_NODE: self.op.node_name,
6363 # Since modifying a node can have severe effects on currently running
6364 # operations the resource lock is at least acquired in shared mode
6365 self.needed_locks[locking.LEVEL_NODE_RES] = \
6366 self.needed_locks[locking.LEVEL_NODE]
6368 # Get all locks except nodes in shared mode; they are not used for anything
6369 # but read-only access
6370 self.share_locks = _ShareAll()
6371 self.share_locks[locking.LEVEL_NODE] = 0
6372 self.share_locks[locking.LEVEL_NODE_RES] = 0
6373 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6375 if self.lock_instances:
6376 self.needed_locks[locking.LEVEL_INSTANCE] = \
6377 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6379 def BuildHooksEnv(self):
6382 This runs on the master node.
6386 "OP_TARGET": self.op.node_name,
6387 "MASTER_CANDIDATE": str(self.op.master_candidate),
6388 "OFFLINE": str(self.op.offline),
6389 "DRAINED": str(self.op.drained),
6390 "MASTER_CAPABLE": str(self.op.master_capable),
6391 "VM_CAPABLE": str(self.op.vm_capable),
6394 def BuildHooksNodes(self):
6395 """Build hooks nodes.
6398 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6401 def CheckPrereq(self):
6402 """Check prerequisites.
6404 This only checks the instance list against the existing names.
6407 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6409 if self.lock_instances:
6410 affected_instances = \
6411 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6413 # Verify instance locks
6414 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6415 wanted_instances = frozenset(affected_instances.keys())
6416 if wanted_instances - owned_instances:
6417 raise errors.OpPrereqError("Instances affected by changing node %s's"
6418 " secondary IP address have changed since"
6419 " locks were acquired, wanted '%s', have"
6420 " '%s'; retry the operation" %
6422 utils.CommaJoin(wanted_instances),
6423 utils.CommaJoin(owned_instances)),
6426 affected_instances = None
6428 if (self.op.master_candidate is not None or
6429 self.op.drained is not None or
6430 self.op.offline is not None):
6431 # we can't change the master's node flags
6432 if self.op.node_name == self.cfg.GetMasterNode():
6433 raise errors.OpPrereqError("The master role can be changed"
6434 " only via master-failover",
6437 if self.op.master_candidate and not node.master_capable:
6438 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6439 " it a master candidate" % node.name,
6442 if self.op.vm_capable is False:
6443 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6445 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6446 " the vm_capable flag" % node.name,
6449 if node.master_candidate and self.might_demote and not self.lock_all:
6450 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6451 # check if after removing the current node, we're missing master
6453 (mc_remaining, mc_should, _) = \
6454 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6455 if mc_remaining < mc_should:
6456 raise errors.OpPrereqError("Not enough master candidates, please"
6457 " pass auto promote option to allow"
6458 " promotion (--auto-promote or RAPI"
6459 " auto_promote=True)", errors.ECODE_STATE)
6461 self.old_flags = old_flags = (node.master_candidate,
6462 node.drained, node.offline)
6463 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6464 self.old_role = old_role = self._F2R[old_flags]
6466 # Check for ineffective changes
6467 for attr in self._FLAGS:
6468 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6469 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6470 setattr(self.op, attr, None)
6472 # Past this point, any flag change to False means a transition
6473 # away from the respective state, as only real changes are kept
6475 # TODO: We might query the real power state if it supports OOB
6476 if _SupportsOob(self.cfg, node):
6477 if self.op.offline is False and not (node.powered or
6478 self.op.powered is True):
6479 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6480 " offline status can be reset") %
6481 self.op.node_name, errors.ECODE_STATE)
6482 elif self.op.powered is not None:
6483 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6484 " as it does not support out-of-band"
6485 " handling") % self.op.node_name,
6488 # If we're being deofflined/drained, we'll MC ourself if needed
6489 if (self.op.drained is False or self.op.offline is False or
6490 (self.op.master_capable and not node.master_capable)):
6491 if _DecideSelfPromotion(self):
6492 self.op.master_candidate = True
6493 self.LogInfo("Auto-promoting node to master candidate")
6495 # If we're no longer master capable, we'll demote ourselves from MC
6496 if self.op.master_capable is False and node.master_candidate:
6497 self.LogInfo("Demoting from master candidate")
6498 self.op.master_candidate = False
6501 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6502 if self.op.master_candidate:
6503 new_role = self._ROLE_CANDIDATE
6504 elif self.op.drained:
6505 new_role = self._ROLE_DRAINED
6506 elif self.op.offline:
6507 new_role = self._ROLE_OFFLINE
6508 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6509 # False is still in new flags, which means we're un-setting (the
6511 new_role = self._ROLE_REGULAR
6512 else: # no new flags, nothing, keep old role
6515 self.new_role = new_role
6517 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6518 # Trying to transition out of offline status
6519 result = self.rpc.call_version([node.name])[node.name]
6521 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6522 " to report its version: %s" %
6523 (node.name, result.fail_msg),
6526 self.LogWarning("Transitioning node from offline to online state"
6527 " without using re-add. Please make sure the node"
6530 # When changing the secondary ip, verify if this is a single-homed to
6531 # multi-homed transition or vice versa, and apply the relevant
6533 if self.op.secondary_ip:
6534 # Ok even without locking, because this can't be changed by any LU
6535 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6536 master_singlehomed = master.secondary_ip == master.primary_ip
6537 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6538 if self.op.force and node.name == master.name:
6539 self.LogWarning("Transitioning from single-homed to multi-homed"
6540 " cluster; all nodes will require a secondary IP"
6543 raise errors.OpPrereqError("Changing the secondary ip on a"
6544 " single-homed cluster requires the"
6545 " --force option to be passed, and the"
6546 " target node to be the master",
6548 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6549 if self.op.force and node.name == master.name:
6550 self.LogWarning("Transitioning from multi-homed to single-homed"
6551 " cluster; secondary IP addresses will have to be"
6554 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6555 " same as the primary IP on a multi-homed"
6556 " cluster, unless the --force option is"
6557 " passed, and the target node is the"
6558 " master", errors.ECODE_INVAL)
6560 assert not (frozenset(affected_instances) -
6561 self.owned_locks(locking.LEVEL_INSTANCE))
6564 if affected_instances:
6565 msg = ("Cannot change secondary IP address: offline node has"
6566 " instances (%s) configured to use it" %
6567 utils.CommaJoin(affected_instances.keys()))
6568 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6570 # On online nodes, check that no instances are running, and that
6571 # the node has the new ip and we can reach it.
6572 for instance in affected_instances.values():
6573 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6574 msg="cannot change secondary ip")
6576 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6577 if master.name != node.name:
6578 # check reachability from master secondary ip to new secondary ip
6579 if not netutils.TcpPing(self.op.secondary_ip,
6580 constants.DEFAULT_NODED_PORT,
6581 source=master.secondary_ip):
6582 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6583 " based ping to node daemon port",
6584 errors.ECODE_ENVIRON)
6586 if self.op.ndparams:
6587 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6588 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6589 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6590 "node", "cluster or group")
6591 self.new_ndparams = new_ndparams
6593 if self.op.hv_state:
6594 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6595 self.node.hv_state_static)
6597 if self.op.disk_state:
6598 self.new_disk_state = \
6599 _MergeAndVerifyDiskState(self.op.disk_state,
6600 self.node.disk_state_static)
6602 def Exec(self, feedback_fn):
6607 old_role = self.old_role
6608 new_role = self.new_role
6612 if self.op.ndparams:
6613 node.ndparams = self.new_ndparams
6615 if self.op.powered is not None:
6616 node.powered = self.op.powered
6618 if self.op.hv_state:
6619 node.hv_state_static = self.new_hv_state
6621 if self.op.disk_state:
6622 node.disk_state_static = self.new_disk_state
6624 for attr in ["master_capable", "vm_capable"]:
6625 val = getattr(self.op, attr)
6627 setattr(node, attr, val)
6628 result.append((attr, str(val)))
6630 if new_role != old_role:
6631 # Tell the node to demote itself, if no longer MC and not offline
6632 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6633 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6635 self.LogWarning("Node failed to demote itself: %s", msg)
6637 new_flags = self._R2F[new_role]
6638 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6640 result.append((desc, str(nf)))
6641 (node.master_candidate, node.drained, node.offline) = new_flags
6643 # we locked all nodes, we adjust the CP before updating this node
6645 _AdjustCandidatePool(self, [node.name])
6647 if self.op.secondary_ip:
6648 node.secondary_ip = self.op.secondary_ip
6649 result.append(("secondary_ip", self.op.secondary_ip))
6651 # this will trigger configuration file update, if needed
6652 self.cfg.Update(node, feedback_fn)
6654 # this will trigger job queue propagation or cleanup if the mc
6656 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6657 self.context.ReaddNode(node)
6662 class LUNodePowercycle(NoHooksLU):
6663 """Powercycles a node.
6668 def CheckArguments(self):
6669 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6670 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6671 raise errors.OpPrereqError("The node is the master and the force"
6672 " parameter was not set",
6675 def ExpandNames(self):
6676 """Locking for PowercycleNode.
6678 This is a last-resort option and shouldn't block on other
6679 jobs. Therefore, we grab no locks.
6682 self.needed_locks = {}
6684 def Exec(self, feedback_fn):
6688 result = self.rpc.call_node_powercycle(self.op.node_name,
6689 self.cfg.GetHypervisorType())
6690 result.Raise("Failed to schedule the reboot")
6691 return result.payload
6694 class LUClusterQuery(NoHooksLU):
6695 """Query cluster configuration.
6700 def ExpandNames(self):
6701 self.needed_locks = {}
6703 def Exec(self, feedback_fn):
6704 """Return cluster config.
6707 cluster = self.cfg.GetClusterInfo()
6710 # Filter just for enabled hypervisors
6711 for os_name, hv_dict in cluster.os_hvp.items():
6712 os_hvp[os_name] = {}
6713 for hv_name, hv_params in hv_dict.items():
6714 if hv_name in cluster.enabled_hypervisors:
6715 os_hvp[os_name][hv_name] = hv_params
6717 # Convert ip_family to ip_version
6718 primary_ip_version = constants.IP4_VERSION
6719 if cluster.primary_ip_family == netutils.IP6Address.family:
6720 primary_ip_version = constants.IP6_VERSION
6723 "software_version": constants.RELEASE_VERSION,
6724 "protocol_version": constants.PROTOCOL_VERSION,
6725 "config_version": constants.CONFIG_VERSION,
6726 "os_api_version": max(constants.OS_API_VERSIONS),
6727 "export_version": constants.EXPORT_VERSION,
6728 "architecture": runtime.GetArchInfo(),
6729 "name": cluster.cluster_name,
6730 "master": cluster.master_node,
6731 "default_hypervisor": cluster.primary_hypervisor,
6732 "enabled_hypervisors": cluster.enabled_hypervisors,
6733 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6734 for hypervisor_name in cluster.enabled_hypervisors]),
6736 "beparams": cluster.beparams,
6737 "osparams": cluster.osparams,
6738 "ipolicy": cluster.ipolicy,
6739 "nicparams": cluster.nicparams,
6740 "ndparams": cluster.ndparams,
6741 "diskparams": cluster.diskparams,
6742 "candidate_pool_size": cluster.candidate_pool_size,
6743 "master_netdev": cluster.master_netdev,
6744 "master_netmask": cluster.master_netmask,
6745 "use_external_mip_script": cluster.use_external_mip_script,
6746 "volume_group_name": cluster.volume_group_name,
6747 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6748 "file_storage_dir": cluster.file_storage_dir,
6749 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6750 "maintain_node_health": cluster.maintain_node_health,
6751 "ctime": cluster.ctime,
6752 "mtime": cluster.mtime,
6753 "uuid": cluster.uuid,
6754 "tags": list(cluster.GetTags()),
6755 "uid_pool": cluster.uid_pool,
6756 "default_iallocator": cluster.default_iallocator,
6757 "reserved_lvs": cluster.reserved_lvs,
6758 "primary_ip_version": primary_ip_version,
6759 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6760 "hidden_os": cluster.hidden_os,
6761 "blacklisted_os": cluster.blacklisted_os,
6767 class LUClusterConfigQuery(NoHooksLU):
6768 """Return configuration values.
6773 def CheckArguments(self):
6774 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6776 def ExpandNames(self):
6777 self.cq.ExpandNames(self)
6779 def DeclareLocks(self, level):
6780 self.cq.DeclareLocks(self, level)
6782 def Exec(self, feedback_fn):
6783 result = self.cq.OldStyleQuery(self)
6785 assert len(result) == 1
6790 class _ClusterQuery(_QueryBase):
6791 FIELDS = query.CLUSTER_FIELDS
6793 #: Do not sort (there is only one item)
6796 def ExpandNames(self, lu):
6797 lu.needed_locks = {}
6799 # The following variables interact with _QueryBase._GetNames
6800 self.wanted = locking.ALL_SET
6801 self.do_locking = self.use_locking
6804 raise errors.OpPrereqError("Can not use locking for cluster queries",
6807 def DeclareLocks(self, lu, level):
6810 def _GetQueryData(self, lu):
6811 """Computes the list of nodes and their attributes.
6814 # Locking is not used
6815 assert not (compat.any(lu.glm.is_owned(level)
6816 for level in locking.LEVELS
6817 if level != locking.LEVEL_CLUSTER) or
6818 self.do_locking or self.use_locking)
6820 if query.CQ_CONFIG in self.requested_data:
6821 cluster = lu.cfg.GetClusterInfo()
6823 cluster = NotImplemented
6825 if query.CQ_QUEUE_DRAINED in self.requested_data:
6826 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6828 drain_flag = NotImplemented
6830 if query.CQ_WATCHER_PAUSE in self.requested_data:
6831 master_name = lu.cfg.GetMasterNode()
6833 result = lu.rpc.call_get_watcher_pause(master_name)
6834 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6837 watcher_pause = result.payload
6839 watcher_pause = NotImplemented
6841 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6844 class LUInstanceActivateDisks(NoHooksLU):
6845 """Bring up an instance's disks.
6850 def ExpandNames(self):
6851 self._ExpandAndLockInstance()
6852 self.needed_locks[locking.LEVEL_NODE] = []
6853 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6855 def DeclareLocks(self, level):
6856 if level == locking.LEVEL_NODE:
6857 self._LockInstancesNodes()
6859 def CheckPrereq(self):
6860 """Check prerequisites.
6862 This checks that the instance is in the cluster.
6865 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6866 assert self.instance is not None, \
6867 "Cannot retrieve locked instance %s" % self.op.instance_name
6868 _CheckNodeOnline(self, self.instance.primary_node)
6870 def Exec(self, feedback_fn):
6871 """Activate the disks.
6874 disks_ok, disks_info = \
6875 _AssembleInstanceDisks(self, self.instance,
6876 ignore_size=self.op.ignore_size)
6878 raise errors.OpExecError("Cannot activate block devices")
6880 if self.op.wait_for_sync:
6881 if not _WaitForSync(self, self.instance):
6882 raise errors.OpExecError("Some disks of the instance are degraded!")
6887 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6889 """Prepare the block devices for an instance.
6891 This sets up the block devices on all nodes.
6893 @type lu: L{LogicalUnit}
6894 @param lu: the logical unit on whose behalf we execute
6895 @type instance: L{objects.Instance}
6896 @param instance: the instance for whose disks we assemble
6897 @type disks: list of L{objects.Disk} or None
6898 @param disks: which disks to assemble (or all, if None)
6899 @type ignore_secondaries: boolean
6900 @param ignore_secondaries: if true, errors on secondary nodes
6901 won't result in an error return from the function
6902 @type ignore_size: boolean
6903 @param ignore_size: if true, the current known size of the disk
6904 will not be used during the disk activation, useful for cases
6905 when the size is wrong
6906 @return: False if the operation failed, otherwise a list of
6907 (host, instance_visible_name, node_visible_name)
6908 with the mapping from node devices to instance devices
6913 iname = instance.name
6914 disks = _ExpandCheckDisks(instance, disks)
6916 # With the two passes mechanism we try to reduce the window of
6917 # opportunity for the race condition of switching DRBD to primary
6918 # before handshaking occured, but we do not eliminate it
6920 # The proper fix would be to wait (with some limits) until the
6921 # connection has been made and drbd transitions from WFConnection
6922 # into any other network-connected state (Connected, SyncTarget,
6925 # 1st pass, assemble on all nodes in secondary mode
6926 for idx, inst_disk in enumerate(disks):
6927 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6929 node_disk = node_disk.Copy()
6930 node_disk.UnsetSize()
6931 lu.cfg.SetDiskID(node_disk, node)
6932 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6934 msg = result.fail_msg
6936 is_offline_secondary = (node in instance.secondary_nodes and
6938 lu.LogWarning("Could not prepare block device %s on node %s"
6939 " (is_primary=False, pass=1): %s",
6940 inst_disk.iv_name, node, msg)
6941 if not (ignore_secondaries or is_offline_secondary):
6944 # FIXME: race condition on drbd migration to primary
6946 # 2nd pass, do only the primary node
6947 for idx, inst_disk in enumerate(disks):
6950 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6951 if node != instance.primary_node:
6954 node_disk = node_disk.Copy()
6955 node_disk.UnsetSize()
6956 lu.cfg.SetDiskID(node_disk, node)
6957 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6959 msg = result.fail_msg
6961 lu.LogWarning("Could not prepare block device %s on node %s"
6962 " (is_primary=True, pass=2): %s",
6963 inst_disk.iv_name, node, msg)
6966 dev_path = result.payload
6968 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6970 # leave the disks configured for the primary node
6971 # this is a workaround that would be fixed better by
6972 # improving the logical/physical id handling
6974 lu.cfg.SetDiskID(disk, instance.primary_node)
6976 return disks_ok, device_info
6979 def _StartInstanceDisks(lu, instance, force):
6980 """Start the disks of an instance.
6983 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6984 ignore_secondaries=force)
6986 _ShutdownInstanceDisks(lu, instance)
6987 if force is not None and not force:
6989 hint=("If the message above refers to a secondary node,"
6990 " you can retry the operation using '--force'"))
6991 raise errors.OpExecError("Disk consistency error")
6994 class LUInstanceDeactivateDisks(NoHooksLU):
6995 """Shutdown an instance's disks.
7000 def ExpandNames(self):
7001 self._ExpandAndLockInstance()
7002 self.needed_locks[locking.LEVEL_NODE] = []
7003 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7005 def DeclareLocks(self, level):
7006 if level == locking.LEVEL_NODE:
7007 self._LockInstancesNodes()
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster.
7015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016 assert self.instance is not None, \
7017 "Cannot retrieve locked instance %s" % self.op.instance_name
7019 def Exec(self, feedback_fn):
7020 """Deactivate the disks
7023 instance = self.instance
7025 _ShutdownInstanceDisks(self, instance)
7027 _SafeShutdownInstanceDisks(self, instance)
7030 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7031 """Shutdown block devices of an instance.
7033 This function checks if an instance is running, before calling
7034 _ShutdownInstanceDisks.
7037 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7038 _ShutdownInstanceDisks(lu, instance, disks=disks)
7041 def _ExpandCheckDisks(instance, disks):
7042 """Return the instance disks selected by the disks list
7044 @type disks: list of L{objects.Disk} or None
7045 @param disks: selected disks
7046 @rtype: list of L{objects.Disk}
7047 @return: selected instance disks to act on
7051 return instance.disks
7053 if not set(disks).issubset(instance.disks):
7054 raise errors.ProgrammerError("Can only act on disks belonging to the"
7059 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7060 """Shutdown block devices of an instance.
7062 This does the shutdown on all nodes of the instance.
7064 If the ignore_primary is false, errors on the primary node are
7069 disks = _ExpandCheckDisks(instance, disks)
7072 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7073 lu.cfg.SetDiskID(top_disk, node)
7074 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7075 msg = result.fail_msg
7077 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7078 disk.iv_name, node, msg)
7079 if ((node == instance.primary_node and not ignore_primary) or
7080 (node != instance.primary_node and not result.offline)):
7085 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7086 """Checks if a node has enough free memory.
7088 This function checks if a given node has the needed amount of free
7089 memory. In case the node has less memory or we cannot get the
7090 information from the node, this function raises an OpPrereqError
7093 @type lu: C{LogicalUnit}
7094 @param lu: a logical unit from which we get configuration data
7096 @param node: the node to check
7097 @type reason: C{str}
7098 @param reason: string to use in the error message
7099 @type requested: C{int}
7100 @param requested: the amount of memory in MiB to check for
7101 @type hypervisor_name: C{str}
7102 @param hypervisor_name: the hypervisor to ask for memory stats
7104 @return: node current free memory
7105 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7106 we cannot check the node
7109 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7110 nodeinfo[node].Raise("Can't get data from node %s" % node,
7111 prereq=True, ecode=errors.ECODE_ENVIRON)
7112 (_, _, (hv_info, )) = nodeinfo[node].payload
7114 free_mem = hv_info.get("memory_free", None)
7115 if not isinstance(free_mem, int):
7116 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7117 " was '%s'" % (node, free_mem),
7118 errors.ECODE_ENVIRON)
7119 if requested > free_mem:
7120 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7121 " needed %s MiB, available %s MiB" %
7122 (node, reason, requested, free_mem),
7127 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7128 """Checks if nodes have enough free disk space in all the VGs.
7130 This function checks if all given nodes have the needed amount of
7131 free disk. In case any node has less disk or we cannot get the
7132 information from the node, this function raises an OpPrereqError
7135 @type lu: C{LogicalUnit}
7136 @param lu: a logical unit from which we get configuration data
7137 @type nodenames: C{list}
7138 @param nodenames: the list of node names to check
7139 @type req_sizes: C{dict}
7140 @param req_sizes: the hash of vg and corresponding amount of disk in
7142 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7143 or we cannot check the node
7146 for vg, req_size in req_sizes.items():
7147 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7150 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7151 """Checks if nodes have enough free disk space in the specified VG.
7153 This function checks if all given nodes have the needed amount of
7154 free disk. In case any node has less disk or we cannot get the
7155 information from the node, this function raises an OpPrereqError
7158 @type lu: C{LogicalUnit}
7159 @param lu: a logical unit from which we get configuration data
7160 @type nodenames: C{list}
7161 @param nodenames: the list of node names to check
7163 @param vg: the volume group to check
7164 @type requested: C{int}
7165 @param requested: the amount of disk in MiB to check for
7166 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7167 or we cannot check the node
7170 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7171 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7172 for node in nodenames:
7173 info = nodeinfo[node]
7174 info.Raise("Cannot get current information from node %s" % node,
7175 prereq=True, ecode=errors.ECODE_ENVIRON)
7176 (_, (vg_info, ), _) = info.payload
7177 vg_free = vg_info.get("vg_free", None)
7178 if not isinstance(vg_free, int):
7179 raise errors.OpPrereqError("Can't compute free disk space on node"
7180 " %s for vg %s, result was '%s'" %
7181 (node, vg, vg_free), errors.ECODE_ENVIRON)
7182 if requested > vg_free:
7183 raise errors.OpPrereqError("Not enough disk space on target node %s"
7184 " vg %s: required %d MiB, available %d MiB" %
7185 (node, vg, requested, vg_free),
7189 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7190 """Checks if nodes have enough physical CPUs
7192 This function checks if all given nodes have the needed number of
7193 physical CPUs. In case any node has less CPUs or we cannot get the
7194 information from the node, this function raises an OpPrereqError
7197 @type lu: C{LogicalUnit}
7198 @param lu: a logical unit from which we get configuration data
7199 @type nodenames: C{list}
7200 @param nodenames: the list of node names to check
7201 @type requested: C{int}
7202 @param requested: the minimum acceptable number of physical CPUs
7203 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7204 or we cannot check the node
7207 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7208 for node in nodenames:
7209 info = nodeinfo[node]
7210 info.Raise("Cannot get current information from node %s" % node,
7211 prereq=True, ecode=errors.ECODE_ENVIRON)
7212 (_, _, (hv_info, )) = info.payload
7213 num_cpus = hv_info.get("cpu_total", None)
7214 if not isinstance(num_cpus, int):
7215 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7216 " on node %s, result was '%s'" %
7217 (node, num_cpus), errors.ECODE_ENVIRON)
7218 if requested > num_cpus:
7219 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7220 "required" % (node, num_cpus, requested),
7224 class LUInstanceStartup(LogicalUnit):
7225 """Starts an instance.
7228 HPATH = "instance-start"
7229 HTYPE = constants.HTYPE_INSTANCE
7232 def CheckArguments(self):
7234 if self.op.beparams:
7235 # fill the beparams dict
7236 objects.UpgradeBeParams(self.op.beparams)
7237 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7239 def ExpandNames(self):
7240 self._ExpandAndLockInstance()
7241 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7243 def DeclareLocks(self, level):
7244 if level == locking.LEVEL_NODE_RES:
7245 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7247 def BuildHooksEnv(self):
7250 This runs on master, primary and secondary nodes of the instance.
7254 "FORCE": self.op.force,
7257 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7261 def BuildHooksNodes(self):
7262 """Build hooks nodes.
7265 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster.
7274 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7275 assert self.instance is not None, \
7276 "Cannot retrieve locked instance %s" % self.op.instance_name
7279 if self.op.hvparams:
7280 # check hypervisor parameter syntax (locally)
7281 cluster = self.cfg.GetClusterInfo()
7282 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7283 filled_hvp = cluster.FillHV(instance)
7284 filled_hvp.update(self.op.hvparams)
7285 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7286 hv_type.CheckParameterSyntax(filled_hvp)
7287 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7289 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7291 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7293 if self.primary_offline and self.op.ignore_offline_nodes:
7294 self.LogWarning("Ignoring offline primary node")
7296 if self.op.hvparams or self.op.beparams:
7297 self.LogWarning("Overridden parameters are ignored")
7299 _CheckNodeOnline(self, instance.primary_node)
7301 bep = self.cfg.GetClusterInfo().FillBE(instance)
7302 bep.update(self.op.beparams)
7304 # check bridges existence
7305 _CheckInstanceBridgesExist(self, instance)
7307 remote_info = self.rpc.call_instance_info(instance.primary_node,
7309 instance.hypervisor)
7310 remote_info.Raise("Error checking node %s" % instance.primary_node,
7311 prereq=True, ecode=errors.ECODE_ENVIRON)
7312 if not remote_info.payload: # not running already
7313 _CheckNodeFreeMemory(self, instance.primary_node,
7314 "starting instance %s" % instance.name,
7315 bep[constants.BE_MINMEM], instance.hypervisor)
7317 def Exec(self, feedback_fn):
7318 """Start the instance.
7321 instance = self.instance
7322 force = self.op.force
7324 if not self.op.no_remember:
7325 self.cfg.MarkInstanceUp(instance.name)
7327 if self.primary_offline:
7328 assert self.op.ignore_offline_nodes
7329 self.LogInfo("Primary node offline, marked instance as started")
7331 node_current = instance.primary_node
7333 _StartInstanceDisks(self, instance, force)
7336 self.rpc.call_instance_start(node_current,
7337 (instance, self.op.hvparams,
7339 self.op.startup_paused)
7340 msg = result.fail_msg
7342 _ShutdownInstanceDisks(self, instance)
7343 raise errors.OpExecError("Could not start instance: %s" % msg)
7346 class LUInstanceReboot(LogicalUnit):
7347 """Reboot an instance.
7350 HPATH = "instance-reboot"
7351 HTYPE = constants.HTYPE_INSTANCE
7354 def ExpandNames(self):
7355 self._ExpandAndLockInstance()
7357 def BuildHooksEnv(self):
7360 This runs on master, primary and secondary nodes of the instance.
7364 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7365 "REBOOT_TYPE": self.op.reboot_type,
7366 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7369 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7373 def BuildHooksNodes(self):
7374 """Build hooks nodes.
7377 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7380 def CheckPrereq(self):
7381 """Check prerequisites.
7383 This checks that the instance is in the cluster.
7386 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7387 assert self.instance is not None, \
7388 "Cannot retrieve locked instance %s" % self.op.instance_name
7389 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7390 _CheckNodeOnline(self, instance.primary_node)
7392 # check bridges existence
7393 _CheckInstanceBridgesExist(self, instance)
7395 def Exec(self, feedback_fn):
7396 """Reboot the instance.
7399 instance = self.instance
7400 ignore_secondaries = self.op.ignore_secondaries
7401 reboot_type = self.op.reboot_type
7403 remote_info = self.rpc.call_instance_info(instance.primary_node,
7405 instance.hypervisor)
7406 remote_info.Raise("Error checking node %s" % instance.primary_node)
7407 instance_running = bool(remote_info.payload)
7409 node_current = instance.primary_node
7411 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7412 constants.INSTANCE_REBOOT_HARD]:
7413 for disk in instance.disks:
7414 self.cfg.SetDiskID(disk, node_current)
7415 result = self.rpc.call_instance_reboot(node_current, instance,
7417 self.op.shutdown_timeout)
7418 result.Raise("Could not reboot instance")
7420 if instance_running:
7421 result = self.rpc.call_instance_shutdown(node_current, instance,
7422 self.op.shutdown_timeout)
7423 result.Raise("Could not shutdown instance for full reboot")
7424 _ShutdownInstanceDisks(self, instance)
7426 self.LogInfo("Instance %s was already stopped, starting now",
7428 _StartInstanceDisks(self, instance, ignore_secondaries)
7429 result = self.rpc.call_instance_start(node_current,
7430 (instance, None, None), False)
7431 msg = result.fail_msg
7433 _ShutdownInstanceDisks(self, instance)
7434 raise errors.OpExecError("Could not start instance for"
7435 " full reboot: %s" % msg)
7437 self.cfg.MarkInstanceUp(instance.name)
7440 class LUInstanceShutdown(LogicalUnit):
7441 """Shutdown an instance.
7444 HPATH = "instance-stop"
7445 HTYPE = constants.HTYPE_INSTANCE
7448 def ExpandNames(self):
7449 self._ExpandAndLockInstance()
7451 def BuildHooksEnv(self):
7454 This runs on master, primary and secondary nodes of the instance.
7457 env = _BuildInstanceHookEnvByObject(self, self.instance)
7458 env["TIMEOUT"] = self.op.timeout
7461 def BuildHooksNodes(self):
7462 """Build hooks nodes.
7465 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7468 def CheckPrereq(self):
7469 """Check prerequisites.
7471 This checks that the instance is in the cluster.
7474 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475 assert self.instance is not None, \
7476 "Cannot retrieve locked instance %s" % self.op.instance_name
7478 if not self.op.force:
7479 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7481 self.LogWarning("Ignoring offline instance check")
7483 self.primary_offline = \
7484 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7486 if self.primary_offline and self.op.ignore_offline_nodes:
7487 self.LogWarning("Ignoring offline primary node")
7489 _CheckNodeOnline(self, self.instance.primary_node)
7491 def Exec(self, feedback_fn):
7492 """Shutdown the instance.
7495 instance = self.instance
7496 node_current = instance.primary_node
7497 timeout = self.op.timeout
7499 # If the instance is offline we shouldn't mark it as down, as that
7500 # resets the offline flag.
7501 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7502 self.cfg.MarkInstanceDown(instance.name)
7504 if self.primary_offline:
7505 assert self.op.ignore_offline_nodes
7506 self.LogInfo("Primary node offline, marked instance as stopped")
7508 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7509 msg = result.fail_msg
7511 self.LogWarning("Could not shutdown instance: %s", msg)
7513 _ShutdownInstanceDisks(self, instance)
7516 class LUInstanceReinstall(LogicalUnit):
7517 """Reinstall an instance.
7520 HPATH = "instance-reinstall"
7521 HTYPE = constants.HTYPE_INSTANCE
7524 def ExpandNames(self):
7525 self._ExpandAndLockInstance()
7527 def BuildHooksEnv(self):
7530 This runs on master, primary and secondary nodes of the instance.
7533 return _BuildInstanceHookEnvByObject(self, self.instance)
7535 def BuildHooksNodes(self):
7536 """Build hooks nodes.
7539 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7542 def CheckPrereq(self):
7543 """Check prerequisites.
7545 This checks that the instance is in the cluster and is not running.
7548 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7549 assert instance is not None, \
7550 "Cannot retrieve locked instance %s" % self.op.instance_name
7551 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7552 " offline, cannot reinstall")
7554 if instance.disk_template == constants.DT_DISKLESS:
7555 raise errors.OpPrereqError("Instance '%s' has no disks" %
7556 self.op.instance_name,
7558 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7560 if self.op.os_type is not None:
7562 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7563 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7564 instance_os = self.op.os_type
7566 instance_os = instance.os
7568 nodelist = list(instance.all_nodes)
7570 if self.op.osparams:
7571 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7572 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7573 self.os_inst = i_osdict # the new dict (without defaults)
7577 self.instance = instance
7579 def Exec(self, feedback_fn):
7580 """Reinstall the instance.
7583 inst = self.instance
7585 if self.op.os_type is not None:
7586 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7587 inst.os = self.op.os_type
7588 # Write to configuration
7589 self.cfg.Update(inst, feedback_fn)
7591 _StartInstanceDisks(self, inst, None)
7593 feedback_fn("Running the instance OS create scripts...")
7594 # FIXME: pass debug option from opcode to backend
7595 result = self.rpc.call_instance_os_add(inst.primary_node,
7596 (inst, self.os_inst), True,
7597 self.op.debug_level)
7598 result.Raise("Could not install OS for instance %s on node %s" %
7599 (inst.name, inst.primary_node))
7601 _ShutdownInstanceDisks(self, inst)
7604 class LUInstanceRecreateDisks(LogicalUnit):
7605 """Recreate an instance's missing disks.
7608 HPATH = "instance-recreate-disks"
7609 HTYPE = constants.HTYPE_INSTANCE
7612 _MODIFYABLE = compat.UniqueFrozenset([
7613 constants.IDISK_SIZE,
7614 constants.IDISK_MODE,
7617 # New or changed disk parameters may have different semantics
7618 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7619 constants.IDISK_ADOPT,
7621 # TODO: Implement support changing VG while recreating
7623 constants.IDISK_METAVG,
7624 constants.IDISK_PROVIDER,
7627 def _RunAllocator(self):
7628 """Run the allocator based on input opcode.
7631 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7634 # The allocator should actually run in "relocate" mode, but current
7635 # allocators don't support relocating all the nodes of an instance at
7636 # the same time. As a workaround we use "allocate" mode, but this is
7637 # suboptimal for two reasons:
7638 # - The instance name passed to the allocator is present in the list of
7639 # existing instances, so there could be a conflict within the
7640 # internal structures of the allocator. This doesn't happen with the
7641 # current allocators, but it's a liability.
7642 # - The allocator counts the resources used by the instance twice: once
7643 # because the instance exists already, and once because it tries to
7644 # allocate a new instance.
7645 # The allocator could choose some of the nodes on which the instance is
7646 # running, but that's not a problem. If the instance nodes are broken,
7647 # they should be already be marked as drained or offline, and hence
7648 # skipped by the allocator. If instance disks have been lost for other
7649 # reasons, then recreating the disks on the same nodes should be fine.
7650 disk_template = self.instance.disk_template
7651 spindle_use = be_full[constants.BE_SPINDLE_USE]
7652 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7653 disk_template=disk_template,
7654 tags=list(self.instance.GetTags()),
7655 os=self.instance.os,
7657 vcpus=be_full[constants.BE_VCPUS],
7658 memory=be_full[constants.BE_MAXMEM],
7659 spindle_use=spindle_use,
7660 disks=[{constants.IDISK_SIZE: d.size,
7661 constants.IDISK_MODE: d.mode}
7662 for d in self.instance.disks],
7663 hypervisor=self.instance.hypervisor,
7664 node_whitelist=None)
7665 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7667 ial.Run(self.op.iallocator)
7669 assert req.RequiredNodes() == len(self.instance.all_nodes)
7672 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7673 " %s" % (self.op.iallocator, ial.info),
7676 self.op.nodes = ial.result
7677 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7678 self.op.instance_name, self.op.iallocator,
7679 utils.CommaJoin(ial.result))
7681 def CheckArguments(self):
7682 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7683 # Normalize and convert deprecated list of disk indices
7684 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7686 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7688 raise errors.OpPrereqError("Some disks have been specified more than"
7689 " once: %s" % utils.CommaJoin(duplicates),
7692 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7693 # when neither iallocator nor nodes are specified
7694 if self.op.iallocator or self.op.nodes:
7695 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7697 for (idx, params) in self.op.disks:
7698 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7699 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7701 raise errors.OpPrereqError("Parameters for disk %s try to change"
7702 " unmodifyable parameter(s): %s" %
7703 (idx, utils.CommaJoin(unsupported)),
7706 def ExpandNames(self):
7707 self._ExpandAndLockInstance()
7708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7711 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7712 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7714 self.needed_locks[locking.LEVEL_NODE] = []
7715 if self.op.iallocator:
7716 # iallocator will select a new node in the same group
7717 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7718 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7720 self.needed_locks[locking.LEVEL_NODE_RES] = []
7722 def DeclareLocks(self, level):
7723 if level == locking.LEVEL_NODEGROUP:
7724 assert self.op.iallocator is not None
7725 assert not self.op.nodes
7726 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7727 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7728 # Lock the primary group used by the instance optimistically; this
7729 # requires going via the node before it's locked, requiring
7730 # verification later on
7731 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7732 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7734 elif level == locking.LEVEL_NODE:
7735 # If an allocator is used, then we lock all the nodes in the current
7736 # instance group, as we don't know yet which ones will be selected;
7737 # if we replace the nodes without using an allocator, locks are
7738 # already declared in ExpandNames; otherwise, we need to lock all the
7739 # instance nodes for disk re-creation
7740 if self.op.iallocator:
7741 assert not self.op.nodes
7742 assert not self.needed_locks[locking.LEVEL_NODE]
7743 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7745 # Lock member nodes of the group of the primary node
7746 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7747 self.needed_locks[locking.LEVEL_NODE].extend(
7748 self.cfg.GetNodeGroup(group_uuid).members)
7750 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7751 elif not self.op.nodes:
7752 self._LockInstancesNodes(primary_only=False)
7753 elif level == locking.LEVEL_NODE_RES:
7755 self.needed_locks[locking.LEVEL_NODE_RES] = \
7756 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7758 def BuildHooksEnv(self):
7761 This runs on master, primary and secondary nodes of the instance.
7764 return _BuildInstanceHookEnvByObject(self, self.instance)
7766 def BuildHooksNodes(self):
7767 """Build hooks nodes.
7770 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7773 def CheckPrereq(self):
7774 """Check prerequisites.
7776 This checks that the instance is in the cluster and is not running.
7779 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7780 assert instance is not None, \
7781 "Cannot retrieve locked instance %s" % self.op.instance_name
7783 if len(self.op.nodes) != len(instance.all_nodes):
7784 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7785 " %d replacement nodes were specified" %
7786 (instance.name, len(instance.all_nodes),
7787 len(self.op.nodes)),
7789 assert instance.disk_template != constants.DT_DRBD8 or \
7790 len(self.op.nodes) == 2
7791 assert instance.disk_template != constants.DT_PLAIN or \
7792 len(self.op.nodes) == 1
7793 primary_node = self.op.nodes[0]
7795 primary_node = instance.primary_node
7796 if not self.op.iallocator:
7797 _CheckNodeOnline(self, primary_node)
7799 if instance.disk_template == constants.DT_DISKLESS:
7800 raise errors.OpPrereqError("Instance '%s' has no disks" %
7801 self.op.instance_name, errors.ECODE_INVAL)
7803 # Verify if node group locks are still correct
7804 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7806 # Node group locks are acquired only for the primary node (and only
7807 # when the allocator is used)
7808 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7811 # if we replace nodes *and* the old primary is offline, we don't
7812 # check the instance state
7813 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7814 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7815 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7816 msg="cannot recreate disks")
7819 self.disks = dict(self.op.disks)
7821 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7823 maxidx = max(self.disks.keys())
7824 if maxidx >= len(instance.disks):
7825 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7828 if ((self.op.nodes or self.op.iallocator) and
7829 sorted(self.disks.keys()) != range(len(instance.disks))):
7830 raise errors.OpPrereqError("Can't recreate disks partially and"
7831 " change the nodes at the same time",
7834 self.instance = instance
7836 if self.op.iallocator:
7837 self._RunAllocator()
7838 # Release unneeded node and node resource locks
7839 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7840 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7841 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7843 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7845 def Exec(self, feedback_fn):
7846 """Recreate the disks.
7849 instance = self.instance
7851 assert (self.owned_locks(locking.LEVEL_NODE) ==
7852 self.owned_locks(locking.LEVEL_NODE_RES))
7855 mods = [] # keeps track of needed changes
7857 for idx, disk in enumerate(instance.disks):
7859 changes = self.disks[idx]
7861 # Disk should not be recreated
7865 # update secondaries for disks, if needed
7866 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7867 # need to update the nodes and minors
7868 assert len(self.op.nodes) == 2
7869 assert len(disk.logical_id) == 6 # otherwise disk internals
7871 (_, _, old_port, _, _, old_secret) = disk.logical_id
7872 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7873 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7874 new_minors[0], new_minors[1], old_secret)
7875 assert len(disk.logical_id) == len(new_id)
7879 mods.append((idx, new_id, changes))
7881 # now that we have passed all asserts above, we can apply the mods
7882 # in a single run (to avoid partial changes)
7883 for idx, new_id, changes in mods:
7884 disk = instance.disks[idx]
7885 if new_id is not None:
7886 assert disk.dev_type == constants.LD_DRBD8
7887 disk.logical_id = new_id
7889 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7890 mode=changes.get(constants.IDISK_MODE, None))
7892 # change primary node, if needed
7894 instance.primary_node = self.op.nodes[0]
7895 self.LogWarning("Changing the instance's nodes, you will have to"
7896 " remove any disks left on the older nodes manually")
7899 self.cfg.Update(instance, feedback_fn)
7901 # All touched nodes must be locked
7902 mylocks = self.owned_locks(locking.LEVEL_NODE)
7903 assert mylocks.issuperset(frozenset(instance.all_nodes))
7904 _CreateDisks(self, instance, to_skip=to_skip)
7907 class LUInstanceRename(LogicalUnit):
7908 """Rename an instance.
7911 HPATH = "instance-rename"
7912 HTYPE = constants.HTYPE_INSTANCE
7914 def CheckArguments(self):
7918 if self.op.ip_check and not self.op.name_check:
7919 # TODO: make the ip check more flexible and not depend on the name check
7920 raise errors.OpPrereqError("IP address check requires a name check",
7923 def BuildHooksEnv(self):
7926 This runs on master, primary and secondary nodes of the instance.
7929 env = _BuildInstanceHookEnvByObject(self, self.instance)
7930 env["INSTANCE_NEW_NAME"] = self.op.new_name
7933 def BuildHooksNodes(self):
7934 """Build hooks nodes.
7937 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7940 def CheckPrereq(self):
7941 """Check prerequisites.
7943 This checks that the instance is in the cluster and is not running.
7946 self.op.instance_name = _ExpandInstanceName(self.cfg,
7947 self.op.instance_name)
7948 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7949 assert instance is not None
7950 _CheckNodeOnline(self, instance.primary_node)
7951 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7952 msg="cannot rename")
7953 self.instance = instance
7955 new_name = self.op.new_name
7956 if self.op.name_check:
7957 hostname = _CheckHostnameSane(self, new_name)
7958 new_name = self.op.new_name = hostname.name
7959 if (self.op.ip_check and
7960 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7961 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7962 (hostname.ip, new_name),
7963 errors.ECODE_NOTUNIQUE)
7965 instance_list = self.cfg.GetInstanceList()
7966 if new_name in instance_list and new_name != instance.name:
7967 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7968 new_name, errors.ECODE_EXISTS)
7970 def Exec(self, feedback_fn):
7971 """Rename the instance.
7974 inst = self.instance
7975 old_name = inst.name
7977 rename_file_storage = False
7978 if (inst.disk_template in constants.DTS_FILEBASED and
7979 self.op.new_name != inst.name):
7980 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7981 rename_file_storage = True
7983 self.cfg.RenameInstance(inst.name, self.op.new_name)
7984 # Change the instance lock. This is definitely safe while we hold the BGL.
7985 # Otherwise the new lock would have to be added in acquired mode.
7987 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7988 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7989 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7991 # re-read the instance from the configuration after rename
7992 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7994 if rename_file_storage:
7995 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7996 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7997 old_file_storage_dir,
7998 new_file_storage_dir)
7999 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8000 " (but the instance has been renamed in Ganeti)" %
8001 (inst.primary_node, old_file_storage_dir,
8002 new_file_storage_dir))
8004 _StartInstanceDisks(self, inst, None)
8005 # update info on disks
8006 info = _GetInstanceInfoText(inst)
8007 for (idx, disk) in enumerate(inst.disks):
8008 for node in inst.all_nodes:
8009 self.cfg.SetDiskID(disk, node)
8010 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8012 self.LogWarning("Error setting info on node %s for disk %s: %s",
8013 node, idx, result.fail_msg)
8015 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8016 old_name, self.op.debug_level)
8017 msg = result.fail_msg
8019 msg = ("Could not run OS rename script for instance %s on node %s"
8020 " (but the instance has been renamed in Ganeti): %s" %
8021 (inst.name, inst.primary_node, msg))
8022 self.LogWarning(msg)
8024 _ShutdownInstanceDisks(self, inst)
8029 class LUInstanceRemove(LogicalUnit):
8030 """Remove an instance.
8033 HPATH = "instance-remove"
8034 HTYPE = constants.HTYPE_INSTANCE
8037 def ExpandNames(self):
8038 self._ExpandAndLockInstance()
8039 self.needed_locks[locking.LEVEL_NODE] = []
8040 self.needed_locks[locking.LEVEL_NODE_RES] = []
8041 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8043 def DeclareLocks(self, level):
8044 if level == locking.LEVEL_NODE:
8045 self._LockInstancesNodes()
8046 elif level == locking.LEVEL_NODE_RES:
8048 self.needed_locks[locking.LEVEL_NODE_RES] = \
8049 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8051 def BuildHooksEnv(self):
8054 This runs on master, primary and secondary nodes of the instance.
8057 env = _BuildInstanceHookEnvByObject(self, self.instance)
8058 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8061 def BuildHooksNodes(self):
8062 """Build hooks nodes.
8065 nl = [self.cfg.GetMasterNode()]
8066 nl_post = list(self.instance.all_nodes) + nl
8067 return (nl, nl_post)
8069 def CheckPrereq(self):
8070 """Check prerequisites.
8072 This checks that the instance is in the cluster.
8075 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8076 assert self.instance is not None, \
8077 "Cannot retrieve locked instance %s" % self.op.instance_name
8079 def Exec(self, feedback_fn):
8080 """Remove the instance.
8083 instance = self.instance
8084 logging.info("Shutting down instance %s on node %s",
8085 instance.name, instance.primary_node)
8087 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8088 self.op.shutdown_timeout)
8089 msg = result.fail_msg
8091 if self.op.ignore_failures:
8092 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8094 raise errors.OpExecError("Could not shutdown instance %s on"
8096 (instance.name, instance.primary_node, msg))
8098 assert (self.owned_locks(locking.LEVEL_NODE) ==
8099 self.owned_locks(locking.LEVEL_NODE_RES))
8100 assert not (set(instance.all_nodes) -
8101 self.owned_locks(locking.LEVEL_NODE)), \
8102 "Not owning correct locks"
8104 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8107 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8108 """Utility function to remove an instance.
8111 logging.info("Removing block devices for instance %s", instance.name)
8113 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8114 if not ignore_failures:
8115 raise errors.OpExecError("Can't remove instance's disks")
8116 feedback_fn("Warning: can't remove instance's disks")
8118 logging.info("Removing instance %s out of cluster config", instance.name)
8120 lu.cfg.RemoveInstance(instance.name)
8122 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8123 "Instance lock removal conflict"
8125 # Remove lock for the instance
8126 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8129 class LUInstanceQuery(NoHooksLU):
8130 """Logical unit for querying instances.
8133 # pylint: disable=W0142
8136 def CheckArguments(self):
8137 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8138 self.op.output_fields, self.op.use_locking)
8140 def ExpandNames(self):
8141 self.iq.ExpandNames(self)
8143 def DeclareLocks(self, level):
8144 self.iq.DeclareLocks(self, level)
8146 def Exec(self, feedback_fn):
8147 return self.iq.OldStyleQuery(self)
8150 def _ExpandNamesForMigration(lu):
8151 """Expands names for use with L{TLMigrateInstance}.
8153 @type lu: L{LogicalUnit}
8156 if lu.op.target_node is not None:
8157 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8159 lu.needed_locks[locking.LEVEL_NODE] = []
8160 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8162 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8163 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8165 # The node allocation lock is actually only needed for externally replicated
8166 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8167 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8170 def _DeclareLocksForMigration(lu, level):
8171 """Declares locks for L{TLMigrateInstance}.
8173 @type lu: L{LogicalUnit}
8174 @param level: Lock level
8177 if level == locking.LEVEL_NODE_ALLOC:
8178 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8180 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8182 # Node locks are already declared here rather than at LEVEL_NODE as we need
8183 # the instance object anyway to declare the node allocation lock.
8184 if instance.disk_template in constants.DTS_EXT_MIRROR:
8185 if lu.op.target_node is None:
8186 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8187 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8189 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8191 del lu.recalculate_locks[locking.LEVEL_NODE]
8193 lu._LockInstancesNodes() # pylint: disable=W0212
8195 elif level == locking.LEVEL_NODE:
8196 # Node locks are declared together with the node allocation lock
8197 assert (lu.needed_locks[locking.LEVEL_NODE] or
8198 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8200 elif level == locking.LEVEL_NODE_RES:
8202 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8203 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8206 class LUInstanceFailover(LogicalUnit):
8207 """Failover an instance.
8210 HPATH = "instance-failover"
8211 HTYPE = constants.HTYPE_INSTANCE
8214 def CheckArguments(self):
8215 """Check the arguments.
8218 self.iallocator = getattr(self.op, "iallocator", None)
8219 self.target_node = getattr(self.op, "target_node", None)
8221 def ExpandNames(self):
8222 self._ExpandAndLockInstance()
8223 _ExpandNamesForMigration(self)
8226 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8227 self.op.ignore_consistency, True,
8228 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8230 self.tasklets = [self._migrater]
8232 def DeclareLocks(self, level):
8233 _DeclareLocksForMigration(self, level)
8235 def BuildHooksEnv(self):
8238 This runs on master, primary and secondary nodes of the instance.
8241 instance = self._migrater.instance
8242 source_node = instance.primary_node
8243 target_node = self.op.target_node
8245 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8246 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8247 "OLD_PRIMARY": source_node,
8248 "NEW_PRIMARY": target_node,
8251 if instance.disk_template in constants.DTS_INT_MIRROR:
8252 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8253 env["NEW_SECONDARY"] = source_node
8255 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8257 env.update(_BuildInstanceHookEnvByObject(self, instance))
8261 def BuildHooksNodes(self):
8262 """Build hooks nodes.
8265 instance = self._migrater.instance
8266 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8267 return (nl, nl + [instance.primary_node])
8270 class LUInstanceMigrate(LogicalUnit):
8271 """Migrate an instance.
8273 This is migration without shutting down, compared to the failover,
8274 which is done with shutdown.
8277 HPATH = "instance-migrate"
8278 HTYPE = constants.HTYPE_INSTANCE
8281 def ExpandNames(self):
8282 self._ExpandAndLockInstance()
8283 _ExpandNamesForMigration(self)
8286 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8287 False, self.op.allow_failover, False,
8288 self.op.allow_runtime_changes,
8289 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8290 self.op.ignore_ipolicy)
8292 self.tasklets = [self._migrater]
8294 def DeclareLocks(self, level):
8295 _DeclareLocksForMigration(self, level)
8297 def BuildHooksEnv(self):
8300 This runs on master, primary and secondary nodes of the instance.
8303 instance = self._migrater.instance
8304 source_node = instance.primary_node
8305 target_node = self.op.target_node
8306 env = _BuildInstanceHookEnvByObject(self, instance)
8308 "MIGRATE_LIVE": self._migrater.live,
8309 "MIGRATE_CLEANUP": self.op.cleanup,
8310 "OLD_PRIMARY": source_node,
8311 "NEW_PRIMARY": target_node,
8312 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8315 if instance.disk_template in constants.DTS_INT_MIRROR:
8316 env["OLD_SECONDARY"] = target_node
8317 env["NEW_SECONDARY"] = source_node
8319 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8323 def BuildHooksNodes(self):
8324 """Build hooks nodes.
8327 instance = self._migrater.instance
8328 snodes = list(instance.secondary_nodes)
8329 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8333 class LUInstanceMove(LogicalUnit):
8334 """Move an instance by data-copying.
8337 HPATH = "instance-move"
8338 HTYPE = constants.HTYPE_INSTANCE
8341 def ExpandNames(self):
8342 self._ExpandAndLockInstance()
8343 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8344 self.op.target_node = target_node
8345 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8346 self.needed_locks[locking.LEVEL_NODE_RES] = []
8347 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8349 def DeclareLocks(self, level):
8350 if level == locking.LEVEL_NODE:
8351 self._LockInstancesNodes(primary_only=True)
8352 elif level == locking.LEVEL_NODE_RES:
8354 self.needed_locks[locking.LEVEL_NODE_RES] = \
8355 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8357 def BuildHooksEnv(self):
8360 This runs on master, primary and secondary nodes of the instance.
8364 "TARGET_NODE": self.op.target_node,
8365 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8367 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8370 def BuildHooksNodes(self):
8371 """Build hooks nodes.
8375 self.cfg.GetMasterNode(),
8376 self.instance.primary_node,
8377 self.op.target_node,
8381 def CheckPrereq(self):
8382 """Check prerequisites.
8384 This checks that the instance is in the cluster.
8387 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8388 assert self.instance is not None, \
8389 "Cannot retrieve locked instance %s" % self.op.instance_name
8391 if instance.disk_template not in constants.DTS_COPYABLE:
8392 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
8393 instance.disk_template, errors.ECODE_STATE)
8395 node = self.cfg.GetNodeInfo(self.op.target_node)
8396 assert node is not None, \
8397 "Cannot retrieve locked node %s" % self.op.target_node
8399 self.target_node = target_node = node.name
8401 if target_node == instance.primary_node:
8402 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8403 (instance.name, target_node),
8406 bep = self.cfg.GetClusterInfo().FillBE(instance)
8408 for idx, dsk in enumerate(instance.disks):
8409 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8410 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8411 " cannot copy" % idx, errors.ECODE_STATE)
8413 _CheckNodeOnline(self, target_node)
8414 _CheckNodeNotDrained(self, target_node)
8415 _CheckNodeVmCapable(self, target_node)
8416 cluster = self.cfg.GetClusterInfo()
8417 group_info = self.cfg.GetNodeGroup(node.group)
8418 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8419 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8420 ignore=self.op.ignore_ipolicy)
8422 if instance.admin_state == constants.ADMINST_UP:
8423 # check memory requirements on the secondary node
8424 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8425 instance.name, bep[constants.BE_MAXMEM],
8426 instance.hypervisor)
8428 self.LogInfo("Not checking memory on the secondary node as"
8429 " instance will not be started")
8431 # check bridge existance
8432 _CheckInstanceBridgesExist(self, instance, node=target_node)
8434 def Exec(self, feedback_fn):
8435 """Move an instance.
8437 The move is done by shutting it down on its present node, copying
8438 the data over (slow) and starting it on the new node.
8441 instance = self.instance
8443 source_node = instance.primary_node
8444 target_node = self.target_node
8446 self.LogInfo("Shutting down instance %s on source node %s",
8447 instance.name, source_node)
8449 assert (self.owned_locks(locking.LEVEL_NODE) ==
8450 self.owned_locks(locking.LEVEL_NODE_RES))
8452 result = self.rpc.call_instance_shutdown(source_node, instance,
8453 self.op.shutdown_timeout)
8454 msg = result.fail_msg
8456 if self.op.ignore_consistency:
8457 self.LogWarning("Could not shutdown instance %s on node %s."
8458 " Proceeding anyway. Please make sure node"
8459 " %s is down. Error details: %s",
8460 instance.name, source_node, source_node, msg)
8462 raise errors.OpExecError("Could not shutdown instance %s on"
8464 (instance.name, source_node, msg))
8466 # create the target disks
8468 _CreateDisks(self, instance, target_node=target_node)
8469 except errors.OpExecError:
8470 self.LogWarning("Device creation failed")
8471 self.cfg.ReleaseDRBDMinors(instance.name)
8474 cluster_name = self.cfg.GetClusterInfo().cluster_name
8477 # activate, get path, copy the data over
8478 for idx, disk in enumerate(instance.disks):
8479 self.LogInfo("Copying data for disk %d", idx)
8480 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8481 instance.name, True, idx)
8483 self.LogWarning("Can't assemble newly created disk %d: %s",
8484 idx, result.fail_msg)
8485 errs.append(result.fail_msg)
8487 dev_path = result.payload
8488 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8489 target_node, dev_path,
8492 self.LogWarning("Can't copy data over for disk %d: %s",
8493 idx, result.fail_msg)
8494 errs.append(result.fail_msg)
8498 self.LogWarning("Some disks failed to copy, aborting")
8500 _RemoveDisks(self, instance, target_node=target_node)
8502 self.cfg.ReleaseDRBDMinors(instance.name)
8503 raise errors.OpExecError("Errors during disk copy: %s" %
8506 instance.primary_node = target_node
8507 self.cfg.Update(instance, feedback_fn)
8509 self.LogInfo("Removing the disks on the original node")
8510 _RemoveDisks(self, instance, target_node=source_node)
8512 # Only start the instance if it's marked as up
8513 if instance.admin_state == constants.ADMINST_UP:
8514 self.LogInfo("Starting instance %s on node %s",
8515 instance.name, target_node)
8517 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8518 ignore_secondaries=True)
8520 _ShutdownInstanceDisks(self, instance)
8521 raise errors.OpExecError("Can't activate the instance's disks")
8523 result = self.rpc.call_instance_start(target_node,
8524 (instance, None, None), False)
8525 msg = result.fail_msg
8527 _ShutdownInstanceDisks(self, instance)
8528 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8529 (instance.name, target_node, msg))
8532 class LUNodeMigrate(LogicalUnit):
8533 """Migrate all instances from a node.
8536 HPATH = "node-migrate"
8537 HTYPE = constants.HTYPE_NODE
8540 def CheckArguments(self):
8543 def ExpandNames(self):
8544 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8546 self.share_locks = _ShareAll()
8547 self.needed_locks = {
8548 locking.LEVEL_NODE: [self.op.node_name],
8551 def BuildHooksEnv(self):
8554 This runs on the master, the primary and all the secondaries.
8558 "NODE_NAME": self.op.node_name,
8559 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8562 def BuildHooksNodes(self):
8563 """Build hooks nodes.
8566 nl = [self.cfg.GetMasterNode()]
8569 def CheckPrereq(self):
8572 def Exec(self, feedback_fn):
8573 # Prepare jobs for migration instances
8574 allow_runtime_changes = self.op.allow_runtime_changes
8576 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8579 iallocator=self.op.iallocator,
8580 target_node=self.op.target_node,
8581 allow_runtime_changes=allow_runtime_changes,
8582 ignore_ipolicy=self.op.ignore_ipolicy)]
8583 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8585 # TODO: Run iallocator in this opcode and pass correct placement options to
8586 # OpInstanceMigrate. Since other jobs can modify the cluster between
8587 # running the iallocator and the actual migration, a good consistency model
8588 # will have to be found.
8590 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8591 frozenset([self.op.node_name]))
8593 return ResultWithJobs(jobs)
8596 class TLMigrateInstance(Tasklet):
8597 """Tasklet class for instance migration.
8600 @ivar live: whether the migration will be done live or non-live;
8601 this variable is initalized only after CheckPrereq has run
8602 @type cleanup: boolean
8603 @ivar cleanup: Wheater we cleanup from a failed migration
8604 @type iallocator: string
8605 @ivar iallocator: The iallocator used to determine target_node
8606 @type target_node: string
8607 @ivar target_node: If given, the target_node to reallocate the instance to
8608 @type failover: boolean
8609 @ivar failover: Whether operation results in failover or migration
8610 @type fallback: boolean
8611 @ivar fallback: Whether fallback to failover is allowed if migration not
8613 @type ignore_consistency: boolean
8614 @ivar ignore_consistency: Wheter we should ignore consistency between source
8616 @type shutdown_timeout: int
8617 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8618 @type ignore_ipolicy: bool
8619 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8624 _MIGRATION_POLL_INTERVAL = 1 # seconds
8625 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8627 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8628 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8630 """Initializes this class.
8633 Tasklet.__init__(self, lu)
8636 self.instance_name = instance_name
8637 self.cleanup = cleanup
8638 self.live = False # will be overridden later
8639 self.failover = failover
8640 self.fallback = fallback
8641 self.ignore_consistency = ignore_consistency
8642 self.shutdown_timeout = shutdown_timeout
8643 self.ignore_ipolicy = ignore_ipolicy
8644 self.allow_runtime_changes = allow_runtime_changes
8646 def CheckPrereq(self):
8647 """Check prerequisites.
8649 This checks that the instance is in the cluster.
8652 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8653 instance = self.cfg.GetInstanceInfo(instance_name)
8654 assert instance is not None
8655 self.instance = instance
8656 cluster = self.cfg.GetClusterInfo()
8658 if (not self.cleanup and
8659 not instance.admin_state == constants.ADMINST_UP and
8660 not self.failover and self.fallback):
8661 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8662 " switching to failover")
8663 self.failover = True
8665 if instance.disk_template not in constants.DTS_MIRRORED:
8670 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8671 " %s" % (instance.disk_template, text),
8674 if instance.disk_template in constants.DTS_EXT_MIRROR:
8675 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8677 if self.lu.op.iallocator:
8678 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8679 self._RunAllocator()
8681 # We set set self.target_node as it is required by
8683 self.target_node = self.lu.op.target_node
8685 # Check that the target node is correct in terms of instance policy
8686 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8687 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8688 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8690 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8691 ignore=self.ignore_ipolicy)
8693 # self.target_node is already populated, either directly or by the
8695 target_node = self.target_node
8696 if self.target_node == instance.primary_node:
8697 raise errors.OpPrereqError("Cannot migrate instance %s"
8698 " to its primary (%s)" %
8699 (instance.name, instance.primary_node),
8702 if len(self.lu.tasklets) == 1:
8703 # It is safe to release locks only when we're the only tasklet
8705 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8706 keep=[instance.primary_node, self.target_node])
8707 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8710 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8712 secondary_nodes = instance.secondary_nodes
8713 if not secondary_nodes:
8714 raise errors.ConfigurationError("No secondary node but using"
8715 " %s disk template" %
8716 instance.disk_template)
8717 target_node = secondary_nodes[0]
8718 if self.lu.op.iallocator or (self.lu.op.target_node and
8719 self.lu.op.target_node != target_node):
8721 text = "failed over"
8724 raise errors.OpPrereqError("Instances with disk template %s cannot"
8725 " be %s to arbitrary nodes"
8726 " (neither an iallocator nor a target"
8727 " node can be passed)" %
8728 (instance.disk_template, text),
8730 nodeinfo = self.cfg.GetNodeInfo(target_node)
8731 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8732 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8734 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8735 ignore=self.ignore_ipolicy)
8737 i_be = cluster.FillBE(instance)
8739 # check memory requirements on the secondary node
8740 if (not self.cleanup and
8741 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8742 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8743 "migrating instance %s" %
8745 i_be[constants.BE_MINMEM],
8746 instance.hypervisor)
8748 self.lu.LogInfo("Not checking memory on the secondary node as"
8749 " instance will not be started")
8751 # check if failover must be forced instead of migration
8752 if (not self.cleanup and not self.failover and
8753 i_be[constants.BE_ALWAYS_FAILOVER]):
8754 self.lu.LogInfo("Instance configured to always failover; fallback"
8756 self.failover = True
8758 # check bridge existance
8759 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8761 if not self.cleanup:
8762 _CheckNodeNotDrained(self.lu, target_node)
8763 if not self.failover:
8764 result = self.rpc.call_instance_migratable(instance.primary_node,
8766 if result.fail_msg and self.fallback:
8767 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8769 self.failover = True
8771 result.Raise("Can't migrate, please use failover",
8772 prereq=True, ecode=errors.ECODE_STATE)
8774 assert not (self.failover and self.cleanup)
8776 if not self.failover:
8777 if self.lu.op.live is not None and self.lu.op.mode is not None:
8778 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8779 " parameters are accepted",
8781 if self.lu.op.live is not None:
8783 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8785 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8786 # reset the 'live' parameter to None so that repeated
8787 # invocations of CheckPrereq do not raise an exception
8788 self.lu.op.live = None
8789 elif self.lu.op.mode is None:
8790 # read the default value from the hypervisor
8791 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8792 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8794 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8796 # Failover is never live
8799 if not (self.failover or self.cleanup):
8800 remote_info = self.rpc.call_instance_info(instance.primary_node,
8802 instance.hypervisor)
8803 remote_info.Raise("Error checking instance on node %s" %
8804 instance.primary_node)
8805 instance_running = bool(remote_info.payload)
8806 if instance_running:
8807 self.current_mem = int(remote_info.payload["memory"])
8809 def _RunAllocator(self):
8810 """Run the allocator based on input opcode.
8813 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8815 # FIXME: add a self.ignore_ipolicy option
8816 req = iallocator.IAReqRelocate(name=self.instance_name,
8817 relocate_from=[self.instance.primary_node])
8818 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8820 ial.Run(self.lu.op.iallocator)
8823 raise errors.OpPrereqError("Can't compute nodes using"
8824 " iallocator '%s': %s" %
8825 (self.lu.op.iallocator, ial.info),
8827 self.target_node = ial.result[0]
8828 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8829 self.instance_name, self.lu.op.iallocator,
8830 utils.CommaJoin(ial.result))
8832 def _WaitUntilSync(self):
8833 """Poll with custom rpc for disk sync.
8835 This uses our own step-based rpc call.
8838 self.feedback_fn("* wait until resync is done")
8842 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8844 (self.instance.disks,
8847 for node, nres in result.items():
8848 nres.Raise("Cannot resync disks on node %s" % node)
8849 node_done, node_percent = nres.payload
8850 all_done = all_done and node_done
8851 if node_percent is not None:
8852 min_percent = min(min_percent, node_percent)
8854 if min_percent < 100:
8855 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8858 def _EnsureSecondary(self, node):
8859 """Demote a node to secondary.
8862 self.feedback_fn("* switching node %s to secondary mode" % node)
8864 for dev in self.instance.disks:
8865 self.cfg.SetDiskID(dev, node)
8867 result = self.rpc.call_blockdev_close(node, self.instance.name,
8868 self.instance.disks)
8869 result.Raise("Cannot change disk to secondary on node %s" % node)
8871 def _GoStandalone(self):
8872 """Disconnect from the network.
8875 self.feedback_fn("* changing into standalone mode")
8876 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8877 self.instance.disks)
8878 for node, nres in result.items():
8879 nres.Raise("Cannot disconnect disks node %s" % node)
8881 def _GoReconnect(self, multimaster):
8882 """Reconnect to the network.
8888 msg = "single-master"
8889 self.feedback_fn("* changing disks into %s mode" % msg)
8890 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8891 (self.instance.disks, self.instance),
8892 self.instance.name, multimaster)
8893 for node, nres in result.items():
8894 nres.Raise("Cannot change disks config on node %s" % node)
8896 def _ExecCleanup(self):
8897 """Try to cleanup after a failed migration.
8899 The cleanup is done by:
8900 - check that the instance is running only on one node
8901 (and update the config if needed)
8902 - change disks on its secondary node to secondary
8903 - wait until disks are fully synchronized
8904 - disconnect from the network
8905 - change disks into single-master mode
8906 - wait again until disks are fully synchronized
8909 instance = self.instance
8910 target_node = self.target_node
8911 source_node = self.source_node
8913 # check running on only one node
8914 self.feedback_fn("* checking where the instance actually runs"
8915 " (if this hangs, the hypervisor might be in"
8917 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8918 for node, result in ins_l.items():
8919 result.Raise("Can't contact node %s" % node)
8921 runningon_source = instance.name in ins_l[source_node].payload
8922 runningon_target = instance.name in ins_l[target_node].payload
8924 if runningon_source and runningon_target:
8925 raise errors.OpExecError("Instance seems to be running on two nodes,"
8926 " or the hypervisor is confused; you will have"
8927 " to ensure manually that it runs only on one"
8928 " and restart this operation")
8930 if not (runningon_source or runningon_target):
8931 raise errors.OpExecError("Instance does not seem to be running at all;"
8932 " in this case it's safer to repair by"
8933 " running 'gnt-instance stop' to ensure disk"
8934 " shutdown, and then restarting it")
8936 if runningon_target:
8937 # the migration has actually succeeded, we need to update the config
8938 self.feedback_fn("* instance running on secondary node (%s),"
8939 " updating config" % target_node)
8940 instance.primary_node = target_node
8941 self.cfg.Update(instance, self.feedback_fn)
8942 demoted_node = source_node
8944 self.feedback_fn("* instance confirmed to be running on its"
8945 " primary node (%s)" % source_node)
8946 demoted_node = target_node
8948 if instance.disk_template in constants.DTS_INT_MIRROR:
8949 self._EnsureSecondary(demoted_node)
8951 self._WaitUntilSync()
8952 except errors.OpExecError:
8953 # we ignore here errors, since if the device is standalone, it
8954 # won't be able to sync
8956 self._GoStandalone()
8957 self._GoReconnect(False)
8958 self._WaitUntilSync()
8960 self.feedback_fn("* done")
8962 def _RevertDiskStatus(self):
8963 """Try to revert the disk status after a failed migration.
8966 target_node = self.target_node
8967 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8971 self._EnsureSecondary(target_node)
8972 self._GoStandalone()
8973 self._GoReconnect(False)
8974 self._WaitUntilSync()
8975 except errors.OpExecError, err:
8976 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8977 " please try to recover the instance manually;"
8978 " error '%s'" % str(err))
8980 def _AbortMigration(self):
8981 """Call the hypervisor code to abort a started migration.
8984 instance = self.instance
8985 target_node = self.target_node
8986 source_node = self.source_node
8987 migration_info = self.migration_info
8989 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8993 abort_msg = abort_result.fail_msg
8995 logging.error("Aborting migration failed on target node %s: %s",
8996 target_node, abort_msg)
8997 # Don't raise an exception here, as we stil have to try to revert the
8998 # disk status, even if this step failed.
9000 abort_result = self.rpc.call_instance_finalize_migration_src(
9001 source_node, instance, False, self.live)
9002 abort_msg = abort_result.fail_msg
9004 logging.error("Aborting migration failed on source node %s: %s",
9005 source_node, abort_msg)
9007 def _ExecMigration(self):
9008 """Migrate an instance.
9010 The migrate is done by:
9011 - change the disks into dual-master mode
9012 - wait until disks are fully synchronized again
9013 - migrate the instance
9014 - change disks on the new secondary node (the old primary) to secondary
9015 - wait until disks are fully synchronized
9016 - change disks into single-master mode
9019 instance = self.instance
9020 target_node = self.target_node
9021 source_node = self.source_node
9023 # Check for hypervisor version mismatch and warn the user.
9024 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9025 None, [self.instance.hypervisor], False)
9026 for ninfo in nodeinfo.values():
9027 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9029 (_, _, (src_info, )) = nodeinfo[source_node].payload
9030 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9032 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9033 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9034 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9035 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9036 if src_version != dst_version:
9037 self.feedback_fn("* warning: hypervisor version mismatch between"
9038 " source (%s) and target (%s) node" %
9039 (src_version, dst_version))
9041 self.feedback_fn("* checking disk consistency between source and target")
9042 for (idx, dev) in enumerate(instance.disks):
9043 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9044 raise errors.OpExecError("Disk %s is degraded or not fully"
9045 " synchronized on target node,"
9046 " aborting migration" % idx)
9048 if self.current_mem > self.tgt_free_mem:
9049 if not self.allow_runtime_changes:
9050 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9051 " free memory to fit instance %s on target"
9052 " node %s (have %dMB, need %dMB)" %
9053 (instance.name, target_node,
9054 self.tgt_free_mem, self.current_mem))
9055 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9056 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9059 rpcres.Raise("Cannot modify instance runtime memory")
9061 # First get the migration information from the remote node
9062 result = self.rpc.call_migration_info(source_node, instance)
9063 msg = result.fail_msg
9065 log_err = ("Failed fetching source migration information from %s: %s" %
9067 logging.error(log_err)
9068 raise errors.OpExecError(log_err)
9070 self.migration_info = migration_info = result.payload
9072 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9073 # Then switch the disks to master/master mode
9074 self._EnsureSecondary(target_node)
9075 self._GoStandalone()
9076 self._GoReconnect(True)
9077 self._WaitUntilSync()
9079 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9080 result = self.rpc.call_accept_instance(target_node,
9083 self.nodes_ip[target_node])
9085 msg = result.fail_msg
9087 logging.error("Instance pre-migration failed, trying to revert"
9088 " disk status: %s", msg)
9089 self.feedback_fn("Pre-migration failed, aborting")
9090 self._AbortMigration()
9091 self._RevertDiskStatus()
9092 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9093 (instance.name, msg))
9095 self.feedback_fn("* migrating instance to %s" % target_node)
9096 result = self.rpc.call_instance_migrate(source_node, instance,
9097 self.nodes_ip[target_node],
9099 msg = result.fail_msg
9101 logging.error("Instance migration failed, trying to revert"
9102 " disk status: %s", msg)
9103 self.feedback_fn("Migration failed, aborting")
9104 self._AbortMigration()
9105 self._RevertDiskStatus()
9106 raise errors.OpExecError("Could not migrate instance %s: %s" %
9107 (instance.name, msg))
9109 self.feedback_fn("* starting memory transfer")
9110 last_feedback = time.time()
9112 result = self.rpc.call_instance_get_migration_status(source_node,
9114 msg = result.fail_msg
9115 ms = result.payload # MigrationStatus instance
9116 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9117 logging.error("Instance migration failed, trying to revert"
9118 " disk status: %s", msg)
9119 self.feedback_fn("Migration failed, aborting")
9120 self._AbortMigration()
9121 self._RevertDiskStatus()
9123 msg = "hypervisor returned failure"
9124 raise errors.OpExecError("Could not migrate instance %s: %s" %
9125 (instance.name, msg))
9127 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9128 self.feedback_fn("* memory transfer complete")
9131 if (utils.TimeoutExpired(last_feedback,
9132 self._MIGRATION_FEEDBACK_INTERVAL) and
9133 ms.transferred_ram is not None):
9134 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9135 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9136 last_feedback = time.time()
9138 time.sleep(self._MIGRATION_POLL_INTERVAL)
9140 result = self.rpc.call_instance_finalize_migration_src(source_node,
9144 msg = result.fail_msg
9146 logging.error("Instance migration succeeded, but finalization failed"
9147 " on the source node: %s", msg)
9148 raise errors.OpExecError("Could not finalize instance migration: %s" %
9151 instance.primary_node = target_node
9153 # distribute new instance config to the other nodes
9154 self.cfg.Update(instance, self.feedback_fn)
9156 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9160 msg = result.fail_msg
9162 logging.error("Instance migration succeeded, but finalization failed"
9163 " on the target node: %s", msg)
9164 raise errors.OpExecError("Could not finalize instance migration: %s" %
9167 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9168 self._EnsureSecondary(source_node)
9169 self._WaitUntilSync()
9170 self._GoStandalone()
9171 self._GoReconnect(False)
9172 self._WaitUntilSync()
9174 # If the instance's disk template is `rbd' or `ext' and there was a
9175 # successful migration, unmap the device from the source node.
9176 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9177 disks = _ExpandCheckDisks(instance, instance.disks)
9178 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9180 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9181 msg = result.fail_msg
9183 logging.error("Migration was successful, but couldn't unmap the"
9184 " block device %s on source node %s: %s",
9185 disk.iv_name, source_node, msg)
9186 logging.error("You need to unmap the device %s manually on %s",
9187 disk.iv_name, source_node)
9189 self.feedback_fn("* done")
9191 def _ExecFailover(self):
9192 """Failover an instance.
9194 The failover is done by shutting it down on its present node and
9195 starting it on the secondary.
9198 instance = self.instance
9199 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9201 source_node = instance.primary_node
9202 target_node = self.target_node
9204 if instance.admin_state == constants.ADMINST_UP:
9205 self.feedback_fn("* checking disk consistency between source and target")
9206 for (idx, dev) in enumerate(instance.disks):
9207 # for drbd, these are drbd over lvm
9208 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9210 if primary_node.offline:
9211 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9213 (primary_node.name, idx, target_node))
9214 elif not self.ignore_consistency:
9215 raise errors.OpExecError("Disk %s is degraded on target node,"
9216 " aborting failover" % idx)
9218 self.feedback_fn("* not checking disk consistency as instance is not"
9221 self.feedback_fn("* shutting down instance on source node")
9222 logging.info("Shutting down instance %s on node %s",
9223 instance.name, source_node)
9225 result = self.rpc.call_instance_shutdown(source_node, instance,
9226 self.shutdown_timeout)
9227 msg = result.fail_msg
9229 if self.ignore_consistency or primary_node.offline:
9230 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9231 " proceeding anyway; please make sure node"
9232 " %s is down; error details: %s",
9233 instance.name, source_node, source_node, msg)
9235 raise errors.OpExecError("Could not shutdown instance %s on"
9237 (instance.name, source_node, msg))
9239 self.feedback_fn("* deactivating the instance's disks on source node")
9240 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9241 raise errors.OpExecError("Can't shut down the instance's disks")
9243 instance.primary_node = target_node
9244 # distribute new instance config to the other nodes
9245 self.cfg.Update(instance, self.feedback_fn)
9247 # Only start the instance if it's marked as up
9248 if instance.admin_state == constants.ADMINST_UP:
9249 self.feedback_fn("* activating the instance's disks on target node %s" %
9251 logging.info("Starting instance %s on node %s",
9252 instance.name, target_node)
9254 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9255 ignore_secondaries=True)
9257 _ShutdownInstanceDisks(self.lu, instance)
9258 raise errors.OpExecError("Can't activate the instance's disks")
9260 self.feedback_fn("* starting the instance on the target node %s" %
9262 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9264 msg = result.fail_msg
9266 _ShutdownInstanceDisks(self.lu, instance)
9267 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9268 (instance.name, target_node, msg))
9270 def Exec(self, feedback_fn):
9271 """Perform the migration.
9274 self.feedback_fn = feedback_fn
9275 self.source_node = self.instance.primary_node
9277 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9278 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9279 self.target_node = self.instance.secondary_nodes[0]
9280 # Otherwise self.target_node has been populated either
9281 # directly, or through an iallocator.
9283 self.all_nodes = [self.source_node, self.target_node]
9284 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9285 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9288 feedback_fn("Failover instance %s" % self.instance.name)
9289 self._ExecFailover()
9291 feedback_fn("Migrating instance %s" % self.instance.name)
9294 return self._ExecCleanup()
9296 return self._ExecMigration()
9299 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9301 """Wrapper around L{_CreateBlockDevInner}.
9303 This method annotates the root device first.
9306 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9307 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9308 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9309 force_open, excl_stor)
9312 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9313 info, force_open, excl_stor):
9314 """Create a tree of block devices on a given node.
9316 If this device type has to be created on secondaries, create it and
9319 If not, just recurse to children keeping the same 'force' value.
9321 @attention: The device has to be annotated already.
9323 @param lu: the lu on whose behalf we execute
9324 @param node: the node on which to create the device
9325 @type instance: L{objects.Instance}
9326 @param instance: the instance which owns the device
9327 @type device: L{objects.Disk}
9328 @param device: the device to create
9329 @type force_create: boolean
9330 @param force_create: whether to force creation of this device; this
9331 will be change to True whenever we find a device which has
9332 CreateOnSecondary() attribute
9333 @param info: the extra 'metadata' we should attach to the device
9334 (this will be represented as a LVM tag)
9335 @type force_open: boolean
9336 @param force_open: this parameter will be passes to the
9337 L{backend.BlockdevCreate} function where it specifies
9338 whether we run on primary or not, and it affects both
9339 the child assembly and the device own Open() execution
9340 @type excl_stor: boolean
9341 @param excl_stor: Whether exclusive_storage is active for the node
9343 @return: list of created devices
9345 created_devices = []
9347 if device.CreateOnSecondary():
9351 for child in device.children:
9352 devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
9353 info, force_open, excl_stor)
9354 created_devices.extend(devs)
9356 if not force_create:
9357 return created_devices
9359 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9361 # The device has been completely created, so there is no point in keeping
9362 # its subdevices in the list. We just add the device itself instead.
9363 created_devices = [(node, device)]
9364 return created_devices
9366 except errors.DeviceCreationError, e:
9367 e.created_devices.extend(created_devices)
9369 except errors.OpExecError, e:
9370 raise errors.DeviceCreationError(str(e), created_devices)
9373 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9375 """Create a single block device on a given node.
9377 This will not recurse over children of the device, so they must be
9380 @param lu: the lu on whose behalf we execute
9381 @param node: the node on which to create the device
9382 @type instance: L{objects.Instance}
9383 @param instance: the instance which owns the device
9384 @type device: L{objects.Disk}
9385 @param device: the device to create
9386 @param info: the extra 'metadata' we should attach to the device
9387 (this will be represented as a LVM tag)
9388 @type force_open: boolean
9389 @param force_open: this parameter will be passes to the
9390 L{backend.BlockdevCreate} function where it specifies
9391 whether we run on primary or not, and it affects both
9392 the child assembly and the device own Open() execution
9393 @type excl_stor: boolean
9394 @param excl_stor: Whether exclusive_storage is active for the node
9397 lu.cfg.SetDiskID(device, node)
9398 result = lu.rpc.call_blockdev_create(node, device, device.size,
9399 instance.name, force_open, info,
9401 result.Raise("Can't create block device %s on"
9402 " node %s for instance %s" % (device, node, instance.name))
9403 if device.physical_id is None:
9404 device.physical_id = result.payload
9407 def _GenerateUniqueNames(lu, exts):
9408 """Generate a suitable LV name.
9410 This will generate a logical volume name for the given instance.
9415 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9416 results.append("%s%s" % (new_id, val))
9420 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9421 iv_name, p_minor, s_minor):
9422 """Generate a drbd8 device complete with its children.
9425 assert len(vgnames) == len(names) == 2
9426 port = lu.cfg.AllocatePort()
9427 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9429 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9430 logical_id=(vgnames[0], names[0]),
9432 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9433 size=constants.DRBD_META_SIZE,
9434 logical_id=(vgnames[1], names[1]),
9436 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9437 logical_id=(primary, secondary, port,
9440 children=[dev_data, dev_meta],
9441 iv_name=iv_name, params={})
9445 _DISK_TEMPLATE_NAME_PREFIX = {
9446 constants.DT_PLAIN: "",
9447 constants.DT_RBD: ".rbd",
9448 constants.DT_EXT: ".ext",
9452 _DISK_TEMPLATE_DEVICE_TYPE = {
9453 constants.DT_PLAIN: constants.LD_LV,
9454 constants.DT_FILE: constants.LD_FILE,
9455 constants.DT_SHARED_FILE: constants.LD_FILE,
9456 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9457 constants.DT_RBD: constants.LD_RBD,
9458 constants.DT_EXT: constants.LD_EXT,
9462 def _GenerateDiskTemplate(
9463 lu, template_name, instance_name, primary_node, secondary_nodes,
9464 disk_info, file_storage_dir, file_driver, base_index,
9465 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9466 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9467 """Generate the entire disk layout for a given template type.
9470 vgname = lu.cfg.GetVGName()
9471 disk_count = len(disk_info)
9474 if template_name == constants.DT_DISKLESS:
9476 elif template_name == constants.DT_DRBD8:
9477 if len(secondary_nodes) != 1:
9478 raise errors.ProgrammerError("Wrong template configuration")
9479 remote_node = secondary_nodes[0]
9480 minors = lu.cfg.AllocateDRBDMinor(
9481 [primary_node, remote_node] * len(disk_info), instance_name)
9483 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9485 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9488 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9489 for i in range(disk_count)]):
9490 names.append(lv_prefix + "_data")
9491 names.append(lv_prefix + "_meta")
9492 for idx, disk in enumerate(disk_info):
9493 disk_index = idx + base_index
9494 data_vg = disk.get(constants.IDISK_VG, vgname)
9495 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9496 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9497 disk[constants.IDISK_SIZE],
9499 names[idx * 2:idx * 2 + 2],
9500 "disk/%d" % disk_index,
9501 minors[idx * 2], minors[idx * 2 + 1])
9502 disk_dev.mode = disk[constants.IDISK_MODE]
9503 disks.append(disk_dev)
9506 raise errors.ProgrammerError("Wrong template configuration")
9508 if template_name == constants.DT_FILE:
9510 elif template_name == constants.DT_SHARED_FILE:
9511 _req_shr_file_storage()
9513 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9514 if name_prefix is None:
9517 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9518 (name_prefix, base_index + i)
9519 for i in range(disk_count)])
9521 if template_name == constants.DT_PLAIN:
9523 def logical_id_fn(idx, _, disk):
9524 vg = disk.get(constants.IDISK_VG, vgname)
9525 return (vg, names[idx])
9527 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9529 lambda _, disk_index, disk: (file_driver,
9530 "%s/disk%d" % (file_storage_dir,
9532 elif template_name == constants.DT_BLOCK:
9534 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9535 disk[constants.IDISK_ADOPT])
9536 elif template_name == constants.DT_RBD:
9537 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9538 elif template_name == constants.DT_EXT:
9539 def logical_id_fn(idx, _, disk):
9540 provider = disk.get(constants.IDISK_PROVIDER, None)
9541 if provider is None:
9542 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9543 " not found", constants.DT_EXT,
9544 constants.IDISK_PROVIDER)
9545 return (provider, names[idx])
9547 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9549 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9551 for idx, disk in enumerate(disk_info):
9553 # Only for the Ext template add disk_info to params
9554 if template_name == constants.DT_EXT:
9555 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9557 if key not in constants.IDISK_PARAMS:
9558 params[key] = disk[key]
9559 disk_index = idx + base_index
9560 size = disk[constants.IDISK_SIZE]
9561 feedback_fn("* disk %s, size %s" %
9562 (disk_index, utils.FormatUnit(size, "h")))
9563 disks.append(objects.Disk(dev_type=dev_type, size=size,
9564 logical_id=logical_id_fn(idx, disk_index, disk),
9565 iv_name="disk/%d" % disk_index,
9566 mode=disk[constants.IDISK_MODE],
9572 def _GetInstanceInfoText(instance):
9573 """Compute that text that should be added to the disk's metadata.
9576 return "originstname+%s" % instance.name
9579 def _CalcEta(time_taken, written, total_size):
9580 """Calculates the ETA based on size written and total size.
9582 @param time_taken: The time taken so far
9583 @param written: amount written so far
9584 @param total_size: The total size of data to be written
9585 @return: The remaining time in seconds
9588 avg_time = time_taken / float(written)
9589 return (total_size - written) * avg_time
9592 def _WipeDisks(lu, instance, disks=None):
9593 """Wipes instance disks.
9595 @type lu: L{LogicalUnit}
9596 @param lu: the logical unit on whose behalf we execute
9597 @type instance: L{objects.Instance}
9598 @param instance: the instance whose disks we should create
9599 @return: the success of the wipe
9602 node = instance.primary_node
9605 disks = [(idx, disk, 0)
9606 for (idx, disk) in enumerate(instance.disks)]
9608 for (_, device, _) in disks:
9609 lu.cfg.SetDiskID(device, node)
9611 logging.info("Pausing synchronization of disks of instance '%s'",
9613 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9614 (map(compat.snd, disks),
9617 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9619 for idx, success in enumerate(result.payload):
9621 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9622 " failed", idx, instance.name)
9625 for (idx, device, offset) in disks:
9626 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9627 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9629 int(min(constants.MAX_WIPE_CHUNK,
9630 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9634 start_time = time.time()
9639 info_text = (" (from %s to %s)" %
9640 (utils.FormatUnit(offset, "h"),
9641 utils.FormatUnit(size, "h")))
9643 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9645 logging.info("Wiping disk %d for instance %s on node %s using"
9646 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9648 while offset < size:
9649 wipe_size = min(wipe_chunk_size, size - offset)
9651 logging.debug("Wiping disk %d, offset %s, chunk %s",
9652 idx, offset, wipe_size)
9654 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9656 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9657 (idx, offset, wipe_size))
9661 if now - last_output >= 60:
9662 eta = _CalcEta(now - start_time, offset, size)
9663 lu.LogInfo(" - done: %.1f%% ETA: %s",
9664 offset / float(size) * 100, utils.FormatSeconds(eta))
9667 logging.info("Resuming synchronization of disks for instance '%s'",
9670 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9671 (map(compat.snd, disks),
9676 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9677 node, result.fail_msg)
9679 for idx, success in enumerate(result.payload):
9681 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9682 " failed", idx, instance.name)
9685 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9686 """Create all disks for an instance.
9688 This abstracts away some work from AddInstance.
9690 @type lu: L{LogicalUnit}
9691 @param lu: the logical unit on whose behalf we execute
9692 @type instance: L{objects.Instance}
9693 @param instance: the instance whose disks we should create
9695 @param to_skip: list of indices to skip
9696 @type target_node: string
9697 @param target_node: if passed, overrides the target node for creation
9699 @return: the success of the creation
9702 info = _GetInstanceInfoText(instance)
9703 if target_node is None:
9704 pnode = instance.primary_node
9705 all_nodes = instance.all_nodes
9710 if instance.disk_template in constants.DTS_FILEBASED:
9711 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9712 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9714 result.Raise("Failed to create directory '%s' on"
9715 " node %s" % (file_storage_dir, pnode))
9718 # Note: this needs to be kept in sync with adding of disks in
9719 # LUInstanceSetParams
9720 for idx, device in enumerate(instance.disks):
9721 if to_skip and idx in to_skip:
9723 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9725 for node in all_nodes:
9726 f_create = node == pnode
9728 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9729 disks_created.append((node, device))
9730 except errors.OpExecError:
9731 logging.warning("Creating disk %s for instance '%s' failed",
9733 except errors.DeviceCreationError, e:
9734 logging.warning("Creating disk %s for instance '%s' failed",
9736 disks_created.extend(e.created_devices)
9737 for (node, disk) in disks_created:
9738 lu.cfg.SetDiskID(disk, node)
9739 result = lu.rpc.call_blockdev_remove(node, disk)
9741 logging.warning("Failed to remove newly-created disk %s on node %s:"
9742 " %s", device, node, result.fail_msg)
9743 raise errors.OpExecError(e.message)
9746 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9747 """Remove all disks for an instance.
9749 This abstracts away some work from `AddInstance()` and
9750 `RemoveInstance()`. Note that in case some of the devices couldn't
9751 be removed, the removal will continue with the other ones.
9753 @type lu: L{LogicalUnit}
9754 @param lu: the logical unit on whose behalf we execute
9755 @type instance: L{objects.Instance}
9756 @param instance: the instance whose disks we should remove
9757 @type target_node: string
9758 @param target_node: used to override the node on which to remove the disks
9760 @return: the success of the removal
9763 logging.info("Removing block devices for instance %s", instance.name)
9766 ports_to_release = set()
9767 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9768 for (idx, device) in enumerate(anno_disks):
9770 edata = [(target_node, device)]
9772 edata = device.ComputeNodeTree(instance.primary_node)
9773 for node, disk in edata:
9774 lu.cfg.SetDiskID(disk, node)
9775 result = lu.rpc.call_blockdev_remove(node, disk)
9777 lu.LogWarning("Could not remove disk %s on node %s,"
9778 " continuing anyway: %s", idx, node, result.fail_msg)
9779 if not (result.offline and node != instance.primary_node):
9782 # if this is a DRBD disk, return its port to the pool
9783 if device.dev_type in constants.LDS_DRBD:
9784 ports_to_release.add(device.logical_id[2])
9786 if all_result or ignore_failures:
9787 for port in ports_to_release:
9788 lu.cfg.AddTcpUdpPort(port)
9790 if instance.disk_template in constants.DTS_FILEBASED:
9791 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9795 tgt = instance.primary_node
9796 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9798 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9799 file_storage_dir, instance.primary_node, result.fail_msg)
9805 def _ComputeDiskSizePerVG(disk_template, disks):
9806 """Compute disk size requirements in the volume group
9809 def _compute(disks, payload):
9810 """Universal algorithm.
9815 vgs[disk[constants.IDISK_VG]] = \
9816 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9820 # Required free disk space as a function of disk and swap space
9822 constants.DT_DISKLESS: {},
9823 constants.DT_PLAIN: _compute(disks, 0),
9824 # 128 MB are added for drbd metadata for each disk
9825 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9826 constants.DT_FILE: {},
9827 constants.DT_SHARED_FILE: {},
9830 if disk_template not in req_size_dict:
9831 raise errors.ProgrammerError("Disk template '%s' size requirement"
9832 " is unknown" % disk_template)
9834 return req_size_dict[disk_template]
9837 def _FilterVmNodes(lu, nodenames):
9838 """Filters out non-vm_capable nodes from a list.
9840 @type lu: L{LogicalUnit}
9841 @param lu: the logical unit for which we check
9842 @type nodenames: list
9843 @param nodenames: the list of nodes on which we should check
9845 @return: the list of vm-capable nodes
9848 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9849 return [name for name in nodenames if name not in vm_nodes]
9852 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9853 """Hypervisor parameter validation.
9855 This function abstract the hypervisor parameter validation to be
9856 used in both instance create and instance modify.
9858 @type lu: L{LogicalUnit}
9859 @param lu: the logical unit for which we check
9860 @type nodenames: list
9861 @param nodenames: the list of nodes on which we should check
9862 @type hvname: string
9863 @param hvname: the name of the hypervisor we should use
9864 @type hvparams: dict
9865 @param hvparams: the parameters which we need to check
9866 @raise errors.OpPrereqError: if the parameters are not valid
9869 nodenames = _FilterVmNodes(lu, nodenames)
9871 cluster = lu.cfg.GetClusterInfo()
9872 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9874 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9875 for node in nodenames:
9879 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9882 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9883 """OS parameters validation.
9885 @type lu: L{LogicalUnit}
9886 @param lu: the logical unit for which we check
9887 @type required: boolean
9888 @param required: whether the validation should fail if the OS is not
9890 @type nodenames: list
9891 @param nodenames: the list of nodes on which we should check
9892 @type osname: string
9893 @param osname: the name of the hypervisor we should use
9894 @type osparams: dict
9895 @param osparams: the parameters which we need to check
9896 @raise errors.OpPrereqError: if the parameters are not valid
9899 nodenames = _FilterVmNodes(lu, nodenames)
9900 result = lu.rpc.call_os_validate(nodenames, required, osname,
9901 [constants.OS_VALIDATE_PARAMETERS],
9903 for node, nres in result.items():
9904 # we don't check for offline cases since this should be run only
9905 # against the master node and/or an instance's nodes
9906 nres.Raise("OS Parameters validation failed on node %s" % node)
9907 if not nres.payload:
9908 lu.LogInfo("OS %s not found on node %s, validation skipped",
9912 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9913 """Wrapper around IAReqInstanceAlloc.
9915 @param op: The instance opcode
9916 @param disks: The computed disks
9917 @param nics: The computed nics
9918 @param beparams: The full filled beparams
9919 @param node_whitelist: List of nodes which should appear as online to the
9920 allocator (unless the node is already marked offline)
9922 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9925 spindle_use = beparams[constants.BE_SPINDLE_USE]
9926 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9927 disk_template=op.disk_template,
9930 vcpus=beparams[constants.BE_VCPUS],
9931 memory=beparams[constants.BE_MAXMEM],
9932 spindle_use=spindle_use,
9934 nics=[n.ToDict() for n in nics],
9935 hypervisor=op.hypervisor,
9936 node_whitelist=node_whitelist)
9939 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9940 """Computes the nics.
9942 @param op: The instance opcode
9943 @param cluster: Cluster configuration object
9944 @param default_ip: The default ip to assign
9945 @param cfg: An instance of the configuration object
9946 @param ec_id: Execution context ID
9948 @returns: The build up nics
9953 nic_mode_req = nic.get(constants.INIC_MODE, None)
9954 nic_mode = nic_mode_req
9955 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9956 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9958 net = nic.get(constants.INIC_NETWORK, None)
9959 link = nic.get(constants.NIC_LINK, None)
9960 ip = nic.get(constants.INIC_IP, None)
9962 if net is None or net.lower() == constants.VALUE_NONE:
9965 if nic_mode_req is not None or link is not None:
9966 raise errors.OpPrereqError("If network is given, no mode or link"
9967 " is allowed to be passed",
9970 # ip validity checks
9971 if ip is None or ip.lower() == constants.VALUE_NONE:
9973 elif ip.lower() == constants.VALUE_AUTO:
9974 if not op.name_check:
9975 raise errors.OpPrereqError("IP address set to auto but name checks"
9976 " have been skipped",
9980 # We defer pool operations until later, so that the iallocator has
9981 # filled in the instance's node(s) dimara
9982 if ip.lower() == constants.NIC_IP_POOL:
9984 raise errors.OpPrereqError("if ip=pool, parameter network"
9985 " must be passed too",
9988 elif not netutils.IPAddress.IsValid(ip):
9989 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9994 # TODO: check the ip address for uniqueness
9995 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9996 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9999 # MAC address verification
10000 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10001 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10002 mac = utils.NormalizeAndValidateMac(mac)
10005 # TODO: We need to factor this out
10006 cfg.ReserveMAC(mac, ec_id)
10007 except errors.ReservationError:
10008 raise errors.OpPrereqError("MAC address %s already in use"
10009 " in cluster" % mac,
10010 errors.ECODE_NOTUNIQUE)
10012 # Build nic parameters
10015 nicparams[constants.NIC_MODE] = nic_mode
10017 nicparams[constants.NIC_LINK] = link
10019 check_params = cluster.SimpleFillNIC(nicparams)
10020 objects.NIC.CheckParameterSyntax(check_params)
10021 net_uuid = cfg.LookupNetwork(net)
10022 nics.append(objects.NIC(mac=mac, ip=nic_ip,
10023 network=net_uuid, nicparams=nicparams))
10028 def _ComputeDisks(op, default_vg):
10029 """Computes the instance disks.
10031 @param op: The instance opcode
10032 @param default_vg: The default_vg to assume
10034 @return: The computed disks
10038 for disk in op.disks:
10039 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10040 if mode not in constants.DISK_ACCESS_SET:
10041 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10042 mode, errors.ECODE_INVAL)
10043 size = disk.get(constants.IDISK_SIZE, None)
10045 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10048 except (TypeError, ValueError):
10049 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10050 errors.ECODE_INVAL)
10052 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10053 if ext_provider and op.disk_template != constants.DT_EXT:
10054 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10055 " disk template, not %s" %
10056 (constants.IDISK_PROVIDER, constants.DT_EXT,
10057 op.disk_template), errors.ECODE_INVAL)
10059 data_vg = disk.get(constants.IDISK_VG, default_vg)
10061 constants.IDISK_SIZE: size,
10062 constants.IDISK_MODE: mode,
10063 constants.IDISK_VG: data_vg,
10066 if constants.IDISK_METAVG in disk:
10067 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10068 if constants.IDISK_ADOPT in disk:
10069 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10071 # For extstorage, demand the `provider' option and add any
10072 # additional parameters (ext-params) to the dict
10073 if op.disk_template == constants.DT_EXT:
10075 new_disk[constants.IDISK_PROVIDER] = ext_provider
10077 if key not in constants.IDISK_PARAMS:
10078 new_disk[key] = disk[key]
10080 raise errors.OpPrereqError("Missing provider for template '%s'" %
10081 constants.DT_EXT, errors.ECODE_INVAL)
10083 disks.append(new_disk)
10088 def _ComputeFullBeParams(op, cluster):
10089 """Computes the full beparams.
10091 @param op: The instance opcode
10092 @param cluster: The cluster config object
10094 @return: The fully filled beparams
10097 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10098 for param, value in op.beparams.iteritems():
10099 if value == constants.VALUE_AUTO:
10100 op.beparams[param] = default_beparams[param]
10101 objects.UpgradeBeParams(op.beparams)
10102 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10103 return cluster.SimpleFillBE(op.beparams)
10106 def _CheckOpportunisticLocking(op):
10107 """Generate error if opportunistic locking is not possible.
10110 if op.opportunistic_locking and not op.iallocator:
10111 raise errors.OpPrereqError("Opportunistic locking is only available in"
10112 " combination with an instance allocator",
10113 errors.ECODE_INVAL)
10116 class LUInstanceCreate(LogicalUnit):
10117 """Create an instance.
10120 HPATH = "instance-add"
10121 HTYPE = constants.HTYPE_INSTANCE
10124 def CheckArguments(self):
10125 """Check arguments.
10128 # do not require name_check to ease forward/backward compatibility
10130 if self.op.no_install and self.op.start:
10131 self.LogInfo("No-installation mode selected, disabling startup")
10132 self.op.start = False
10133 # validate/normalize the instance name
10134 self.op.instance_name = \
10135 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10137 if self.op.ip_check and not self.op.name_check:
10138 # TODO: make the ip check more flexible and not depend on the name check
10139 raise errors.OpPrereqError("Cannot do IP address check without a name"
10140 " check", errors.ECODE_INVAL)
10142 # check nics' parameter names
10143 for nic in self.op.nics:
10144 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10146 # check disks. parameter names and consistent adopt/no-adopt strategy
10147 has_adopt = has_no_adopt = False
10148 for disk in self.op.disks:
10149 if self.op.disk_template != constants.DT_EXT:
10150 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10151 if constants.IDISK_ADOPT in disk:
10154 has_no_adopt = True
10155 if has_adopt and has_no_adopt:
10156 raise errors.OpPrereqError("Either all disks are adopted or none is",
10157 errors.ECODE_INVAL)
10159 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10160 raise errors.OpPrereqError("Disk adoption is not supported for the"
10161 " '%s' disk template" %
10162 self.op.disk_template,
10163 errors.ECODE_INVAL)
10164 if self.op.iallocator is not None:
10165 raise errors.OpPrereqError("Disk adoption not allowed with an"
10166 " iallocator script", errors.ECODE_INVAL)
10167 if self.op.mode == constants.INSTANCE_IMPORT:
10168 raise errors.OpPrereqError("Disk adoption not allowed for"
10169 " instance import", errors.ECODE_INVAL)
10171 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10172 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10173 " but no 'adopt' parameter given" %
10174 self.op.disk_template,
10175 errors.ECODE_INVAL)
10177 self.adopt_disks = has_adopt
10179 # instance name verification
10180 if self.op.name_check:
10181 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10182 self.op.instance_name = self.hostname1.name
10183 # used in CheckPrereq for ip ping check
10184 self.check_ip = self.hostname1.ip
10186 self.check_ip = None
10188 # file storage checks
10189 if (self.op.file_driver and
10190 not self.op.file_driver in constants.FILE_DRIVER):
10191 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10192 self.op.file_driver, errors.ECODE_INVAL)
10194 if self.op.disk_template == constants.DT_FILE:
10195 opcodes.RequireFileStorage()
10196 elif self.op.disk_template == constants.DT_SHARED_FILE:
10197 opcodes.RequireSharedFileStorage()
10199 ### Node/iallocator related checks
10200 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10202 if self.op.pnode is not None:
10203 if self.op.disk_template in constants.DTS_INT_MIRROR:
10204 if self.op.snode is None:
10205 raise errors.OpPrereqError("The networked disk templates need"
10206 " a mirror node", errors.ECODE_INVAL)
10207 elif self.op.snode:
10208 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10210 self.op.snode = None
10212 _CheckOpportunisticLocking(self.op)
10214 self._cds = _GetClusterDomainSecret()
10216 if self.op.mode == constants.INSTANCE_IMPORT:
10217 # On import force_variant must be True, because if we forced it at
10218 # initial install, our only chance when importing it back is that it
10220 self.op.force_variant = True
10222 if self.op.no_install:
10223 self.LogInfo("No-installation mode has no effect during import")
10225 elif self.op.mode == constants.INSTANCE_CREATE:
10226 if self.op.os_type is None:
10227 raise errors.OpPrereqError("No guest OS specified",
10228 errors.ECODE_INVAL)
10229 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10230 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10231 " installation" % self.op.os_type,
10232 errors.ECODE_STATE)
10233 if self.op.disk_template is None:
10234 raise errors.OpPrereqError("No disk template specified",
10235 errors.ECODE_INVAL)
10237 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10238 # Check handshake to ensure both clusters have the same domain secret
10239 src_handshake = self.op.source_handshake
10240 if not src_handshake:
10241 raise errors.OpPrereqError("Missing source handshake",
10242 errors.ECODE_INVAL)
10244 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10247 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10248 errors.ECODE_INVAL)
10250 # Load and check source CA
10251 self.source_x509_ca_pem = self.op.source_x509_ca
10252 if not self.source_x509_ca_pem:
10253 raise errors.OpPrereqError("Missing source X509 CA",
10254 errors.ECODE_INVAL)
10257 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10259 except OpenSSL.crypto.Error, err:
10260 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10261 (err, ), errors.ECODE_INVAL)
10263 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10264 if errcode is not None:
10265 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10266 errors.ECODE_INVAL)
10268 self.source_x509_ca = cert
10270 src_instance_name = self.op.source_instance_name
10271 if not src_instance_name:
10272 raise errors.OpPrereqError("Missing source instance name",
10273 errors.ECODE_INVAL)
10275 self.source_instance_name = \
10276 netutils.GetHostname(name=src_instance_name).name
10279 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10280 self.op.mode, errors.ECODE_INVAL)
10282 def ExpandNames(self):
10283 """ExpandNames for CreateInstance.
10285 Figure out the right locks for instance creation.
10288 self.needed_locks = {}
10290 instance_name = self.op.instance_name
10291 # this is just a preventive check, but someone might still add this
10292 # instance in the meantime, and creation will fail at lock-add time
10293 if instance_name in self.cfg.GetInstanceList():
10294 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10295 instance_name, errors.ECODE_EXISTS)
10297 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10299 if self.op.iallocator:
10300 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10301 # specifying a group on instance creation and then selecting nodes from
10303 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10304 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10306 if self.op.opportunistic_locking:
10307 self.opportunistic_locks[locking.LEVEL_NODE] = True
10308 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10310 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10311 nodelist = [self.op.pnode]
10312 if self.op.snode is not None:
10313 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10314 nodelist.append(self.op.snode)
10315 self.needed_locks[locking.LEVEL_NODE] = nodelist
10317 # in case of import lock the source node too
10318 if self.op.mode == constants.INSTANCE_IMPORT:
10319 src_node = self.op.src_node
10320 src_path = self.op.src_path
10322 if src_path is None:
10323 self.op.src_path = src_path = self.op.instance_name
10325 if src_node is None:
10326 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10327 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10328 self.op.src_node = None
10329 if os.path.isabs(src_path):
10330 raise errors.OpPrereqError("Importing an instance from a path"
10331 " requires a source node option",
10332 errors.ECODE_INVAL)
10334 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10335 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10336 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10337 if not os.path.isabs(src_path):
10338 self.op.src_path = src_path = \
10339 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10341 self.needed_locks[locking.LEVEL_NODE_RES] = \
10342 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10344 def _RunAllocator(self):
10345 """Run the allocator based on input opcode.
10348 if self.op.opportunistic_locking:
10349 # Only consider nodes for which a lock is held
10350 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10352 node_whitelist = None
10354 #TODO Export network to iallocator so that it chooses a pnode
10355 # in a nodegroup that has the desired network connected to
10356 req = _CreateInstanceAllocRequest(self.op, self.disks,
10357 self.nics, self.be_full,
10359 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10361 ial.Run(self.op.iallocator)
10363 if not ial.success:
10364 # When opportunistic locks are used only a temporary failure is generated
10365 if self.op.opportunistic_locking:
10366 ecode = errors.ECODE_TEMP_NORES
10368 ecode = errors.ECODE_NORES
10370 raise errors.OpPrereqError("Can't compute nodes using"
10371 " iallocator '%s': %s" %
10372 (self.op.iallocator, ial.info),
10375 self.op.pnode = ial.result[0]
10376 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10377 self.op.instance_name, self.op.iallocator,
10378 utils.CommaJoin(ial.result))
10380 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10382 if req.RequiredNodes() == 2:
10383 self.op.snode = ial.result[1]
10385 def BuildHooksEnv(self):
10386 """Build hooks env.
10388 This runs on master, primary and secondary nodes of the instance.
10392 "ADD_MODE": self.op.mode,
10394 if self.op.mode == constants.INSTANCE_IMPORT:
10395 env["SRC_NODE"] = self.op.src_node
10396 env["SRC_PATH"] = self.op.src_path
10397 env["SRC_IMAGES"] = self.src_images
10399 env.update(_BuildInstanceHookEnv(
10400 name=self.op.instance_name,
10401 primary_node=self.op.pnode,
10402 secondary_nodes=self.secondaries,
10403 status=self.op.start,
10404 os_type=self.op.os_type,
10405 minmem=self.be_full[constants.BE_MINMEM],
10406 maxmem=self.be_full[constants.BE_MAXMEM],
10407 vcpus=self.be_full[constants.BE_VCPUS],
10408 nics=_NICListToTuple(self, self.nics),
10409 disk_template=self.op.disk_template,
10410 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10411 for d in self.disks],
10414 hypervisor_name=self.op.hypervisor,
10420 def BuildHooksNodes(self):
10421 """Build hooks nodes.
10424 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10427 def _ReadExportInfo(self):
10428 """Reads the export information from disk.
10430 It will override the opcode source node and path with the actual
10431 information, if these two were not specified before.
10433 @return: the export information
10436 assert self.op.mode == constants.INSTANCE_IMPORT
10438 src_node = self.op.src_node
10439 src_path = self.op.src_path
10441 if src_node is None:
10442 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10443 exp_list = self.rpc.call_export_list(locked_nodes)
10445 for node in exp_list:
10446 if exp_list[node].fail_msg:
10448 if src_path in exp_list[node].payload:
10450 self.op.src_node = src_node = node
10451 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10455 raise errors.OpPrereqError("No export found for relative path %s" %
10456 src_path, errors.ECODE_INVAL)
10458 _CheckNodeOnline(self, src_node)
10459 result = self.rpc.call_export_info(src_node, src_path)
10460 result.Raise("No export or invalid export found in dir %s" % src_path)
10462 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10463 if not export_info.has_section(constants.INISECT_EXP):
10464 raise errors.ProgrammerError("Corrupted export config",
10465 errors.ECODE_ENVIRON)
10467 ei_version = export_info.get(constants.INISECT_EXP, "version")
10468 if (int(ei_version) != constants.EXPORT_VERSION):
10469 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10470 (ei_version, constants.EXPORT_VERSION),
10471 errors.ECODE_ENVIRON)
10474 def _ReadExportParams(self, einfo):
10475 """Use export parameters as defaults.
10477 In case the opcode doesn't specify (as in override) some instance
10478 parameters, then try to use them from the export information, if
10479 that declares them.
10482 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10484 if self.op.disk_template is None:
10485 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10486 self.op.disk_template = einfo.get(constants.INISECT_INS,
10488 if self.op.disk_template not in constants.DISK_TEMPLATES:
10489 raise errors.OpPrereqError("Disk template specified in configuration"
10490 " file is not one of the allowed values:"
10492 " ".join(constants.DISK_TEMPLATES),
10493 errors.ECODE_INVAL)
10495 raise errors.OpPrereqError("No disk template specified and the export"
10496 " is missing the disk_template information",
10497 errors.ECODE_INVAL)
10499 if not self.op.disks:
10501 # TODO: import the disk iv_name too
10502 for idx in range(constants.MAX_DISKS):
10503 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10504 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10505 disks.append({constants.IDISK_SIZE: disk_sz})
10506 self.op.disks = disks
10507 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10508 raise errors.OpPrereqError("No disk info specified and the export"
10509 " is missing the disk information",
10510 errors.ECODE_INVAL)
10512 if not self.op.nics:
10514 for idx in range(constants.MAX_NICS):
10515 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10517 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10518 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10523 self.op.nics = nics
10525 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10526 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10528 if (self.op.hypervisor is None and
10529 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10530 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10532 if einfo.has_section(constants.INISECT_HYP):
10533 # use the export parameters but do not override the ones
10534 # specified by the user
10535 for name, value in einfo.items(constants.INISECT_HYP):
10536 if name not in self.op.hvparams:
10537 self.op.hvparams[name] = value
10539 if einfo.has_section(constants.INISECT_BEP):
10540 # use the parameters, without overriding
10541 for name, value in einfo.items(constants.INISECT_BEP):
10542 if name not in self.op.beparams:
10543 self.op.beparams[name] = value
10544 # Compatibility for the old "memory" be param
10545 if name == constants.BE_MEMORY:
10546 if constants.BE_MAXMEM not in self.op.beparams:
10547 self.op.beparams[constants.BE_MAXMEM] = value
10548 if constants.BE_MINMEM not in self.op.beparams:
10549 self.op.beparams[constants.BE_MINMEM] = value
10551 # try to read the parameters old style, from the main section
10552 for name in constants.BES_PARAMETERS:
10553 if (name not in self.op.beparams and
10554 einfo.has_option(constants.INISECT_INS, name)):
10555 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10557 if einfo.has_section(constants.INISECT_OSP):
10558 # use the parameters, without overriding
10559 for name, value in einfo.items(constants.INISECT_OSP):
10560 if name not in self.op.osparams:
10561 self.op.osparams[name] = value
10563 def _RevertToDefaults(self, cluster):
10564 """Revert the instance parameters to the default values.
10568 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10569 for name in self.op.hvparams.keys():
10570 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10571 del self.op.hvparams[name]
10573 be_defs = cluster.SimpleFillBE({})
10574 for name in self.op.beparams.keys():
10575 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10576 del self.op.beparams[name]
10578 nic_defs = cluster.SimpleFillNIC({})
10579 for nic in self.op.nics:
10580 for name in constants.NICS_PARAMETERS:
10581 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10584 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10585 for name in self.op.osparams.keys():
10586 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10587 del self.op.osparams[name]
10589 def _CalculateFileStorageDir(self):
10590 """Calculate final instance file storage dir.
10593 # file storage dir calculation/check
10594 self.instance_file_storage_dir = None
10595 if self.op.disk_template in constants.DTS_FILEBASED:
10596 # build the full file storage dir path
10599 if self.op.disk_template == constants.DT_SHARED_FILE:
10600 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10602 get_fsd_fn = self.cfg.GetFileStorageDir
10604 cfg_storagedir = get_fsd_fn()
10605 if not cfg_storagedir:
10606 raise errors.OpPrereqError("Cluster file storage dir not defined",
10607 errors.ECODE_STATE)
10608 joinargs.append(cfg_storagedir)
10610 if self.op.file_storage_dir is not None:
10611 joinargs.append(self.op.file_storage_dir)
10613 joinargs.append(self.op.instance_name)
10615 # pylint: disable=W0142
10616 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10618 def CheckPrereq(self): # pylint: disable=R0914
10619 """Check prerequisites.
10622 self._CalculateFileStorageDir()
10624 if self.op.mode == constants.INSTANCE_IMPORT:
10625 export_info = self._ReadExportInfo()
10626 self._ReadExportParams(export_info)
10627 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10629 self._old_instance_name = None
10631 if (not self.cfg.GetVGName() and
10632 self.op.disk_template not in constants.DTS_NOT_LVM):
10633 raise errors.OpPrereqError("Cluster does not support lvm-based"
10634 " instances", errors.ECODE_STATE)
10636 if (self.op.hypervisor is None or
10637 self.op.hypervisor == constants.VALUE_AUTO):
10638 self.op.hypervisor = self.cfg.GetHypervisorType()
10640 cluster = self.cfg.GetClusterInfo()
10641 enabled_hvs = cluster.enabled_hypervisors
10642 if self.op.hypervisor not in enabled_hvs:
10643 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10645 (self.op.hypervisor, ",".join(enabled_hvs)),
10646 errors.ECODE_STATE)
10648 # Check tag validity
10649 for tag in self.op.tags:
10650 objects.TaggableObject.ValidateTag(tag)
10652 # check hypervisor parameter syntax (locally)
10653 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10654 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10656 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10657 hv_type.CheckParameterSyntax(filled_hvp)
10658 self.hv_full = filled_hvp
10659 # check that we don't specify global parameters on an instance
10660 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10661 "instance", "cluster")
10663 # fill and remember the beparams dict
10664 self.be_full = _ComputeFullBeParams(self.op, cluster)
10666 # build os parameters
10667 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10669 # now that hvp/bep are in final format, let's reset to defaults,
10671 if self.op.identify_defaults:
10672 self._RevertToDefaults(cluster)
10675 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10676 self.proc.GetECId())
10678 # disk checks/pre-build
10679 default_vg = self.cfg.GetVGName()
10680 self.disks = _ComputeDisks(self.op, default_vg)
10682 if self.op.mode == constants.INSTANCE_IMPORT:
10684 for idx in range(len(self.disks)):
10685 option = "disk%d_dump" % idx
10686 if export_info.has_option(constants.INISECT_INS, option):
10687 # FIXME: are the old os-es, disk sizes, etc. useful?
10688 export_name = export_info.get(constants.INISECT_INS, option)
10689 image = utils.PathJoin(self.op.src_path, export_name)
10690 disk_images.append(image)
10692 disk_images.append(False)
10694 self.src_images = disk_images
10696 if self.op.instance_name == self._old_instance_name:
10697 for idx, nic in enumerate(self.nics):
10698 if nic.mac == constants.VALUE_AUTO:
10699 nic_mac_ini = "nic%d_mac" % idx
10700 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10702 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10704 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10705 if self.op.ip_check:
10706 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10707 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10708 (self.check_ip, self.op.instance_name),
10709 errors.ECODE_NOTUNIQUE)
10711 #### mac address generation
10712 # By generating here the mac address both the allocator and the hooks get
10713 # the real final mac address rather than the 'auto' or 'generate' value.
10714 # There is a race condition between the generation and the instance object
10715 # creation, which means that we know the mac is valid now, but we're not
10716 # sure it will be when we actually add the instance. If things go bad
10717 # adding the instance will abort because of a duplicate mac, and the
10718 # creation job will fail.
10719 for nic in self.nics:
10720 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10721 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10725 if self.op.iallocator is not None:
10726 self._RunAllocator()
10728 # Release all unneeded node locks
10729 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10730 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10731 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10732 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10734 assert (self.owned_locks(locking.LEVEL_NODE) ==
10735 self.owned_locks(locking.LEVEL_NODE_RES)), \
10736 "Node locks differ from node resource locks"
10738 #### node related checks
10740 # check primary node
10741 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10742 assert self.pnode is not None, \
10743 "Cannot retrieve locked node %s" % self.op.pnode
10745 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10746 pnode.name, errors.ECODE_STATE)
10748 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10749 pnode.name, errors.ECODE_STATE)
10750 if not pnode.vm_capable:
10751 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10752 " '%s'" % pnode.name, errors.ECODE_STATE)
10754 self.secondaries = []
10756 # Fill in any IPs from IP pools. This must happen here, because we need to
10757 # know the nic's primary node, as specified by the iallocator
10758 for idx, nic in enumerate(self.nics):
10759 net_uuid = nic.network
10760 if net_uuid is not None:
10761 nobj = self.cfg.GetNetwork(net_uuid)
10762 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10763 if netparams is None:
10764 raise errors.OpPrereqError("No netparams found for network"
10765 " %s. Propably not connected to"
10766 " node's %s nodegroup" %
10767 (nobj.name, self.pnode.name),
10768 errors.ECODE_INVAL)
10769 self.LogInfo("NIC/%d inherits netparams %s" %
10770 (idx, netparams.values()))
10771 nic.nicparams = dict(netparams)
10772 if nic.ip is not None:
10773 if nic.ip.lower() == constants.NIC_IP_POOL:
10775 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10776 except errors.ReservationError:
10777 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10778 " from the address pool" % idx,
10779 errors.ECODE_STATE)
10780 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10783 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10784 except errors.ReservationError:
10785 raise errors.OpPrereqError("IP address %s already in use"
10786 " or does not belong to network %s" %
10787 (nic.ip, nobj.name),
10788 errors.ECODE_NOTUNIQUE)
10790 # net is None, ip None or given
10791 elif self.op.conflicts_check:
10792 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10794 # mirror node verification
10795 if self.op.disk_template in constants.DTS_INT_MIRROR:
10796 if self.op.snode == pnode.name:
10797 raise errors.OpPrereqError("The secondary node cannot be the"
10798 " primary node", errors.ECODE_INVAL)
10799 _CheckNodeOnline(self, self.op.snode)
10800 _CheckNodeNotDrained(self, self.op.snode)
10801 _CheckNodeVmCapable(self, self.op.snode)
10802 self.secondaries.append(self.op.snode)
10804 snode = self.cfg.GetNodeInfo(self.op.snode)
10805 if pnode.group != snode.group:
10806 self.LogWarning("The primary and secondary nodes are in two"
10807 " different node groups; the disk parameters"
10808 " from the first disk's node group will be"
10811 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10813 if self.op.disk_template in constants.DTS_INT_MIRROR:
10814 nodes.append(snode)
10815 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10816 if compat.any(map(has_es, nodes)):
10817 raise errors.OpPrereqError("Disk template %s not supported with"
10818 " exclusive storage" % self.op.disk_template,
10819 errors.ECODE_STATE)
10821 nodenames = [pnode.name] + self.secondaries
10823 if not self.adopt_disks:
10824 if self.op.disk_template == constants.DT_RBD:
10825 # _CheckRADOSFreeSpace() is just a placeholder.
10826 # Any function that checks prerequisites can be placed here.
10827 # Check if there is enough space on the RADOS cluster.
10828 _CheckRADOSFreeSpace()
10829 elif self.op.disk_template == constants.DT_EXT:
10830 # FIXME: Function that checks prereqs if needed
10833 # Check lv size requirements, if not adopting
10834 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10835 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10837 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10838 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10839 disk[constants.IDISK_ADOPT])
10840 for disk in self.disks])
10841 if len(all_lvs) != len(self.disks):
10842 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10843 errors.ECODE_INVAL)
10844 for lv_name in all_lvs:
10846 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10847 # to ReserveLV uses the same syntax
10848 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10849 except errors.ReservationError:
10850 raise errors.OpPrereqError("LV named %s used by another instance" %
10851 lv_name, errors.ECODE_NOTUNIQUE)
10853 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10854 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10856 node_lvs = self.rpc.call_lv_list([pnode.name],
10857 vg_names.payload.keys())[pnode.name]
10858 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10859 node_lvs = node_lvs.payload
10861 delta = all_lvs.difference(node_lvs.keys())
10863 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10864 utils.CommaJoin(delta),
10865 errors.ECODE_INVAL)
10866 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10868 raise errors.OpPrereqError("Online logical volumes found, cannot"
10869 " adopt: %s" % utils.CommaJoin(online_lvs),
10870 errors.ECODE_STATE)
10871 # update the size of disk based on what is found
10872 for dsk in self.disks:
10873 dsk[constants.IDISK_SIZE] = \
10874 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10875 dsk[constants.IDISK_ADOPT])][0]))
10877 elif self.op.disk_template == constants.DT_BLOCK:
10878 # Normalize and de-duplicate device paths
10879 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10880 for disk in self.disks])
10881 if len(all_disks) != len(self.disks):
10882 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10883 errors.ECODE_INVAL)
10884 baddisks = [d for d in all_disks
10885 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10887 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10888 " cannot be adopted" %
10889 (utils.CommaJoin(baddisks),
10890 constants.ADOPTABLE_BLOCKDEV_ROOT),
10891 errors.ECODE_INVAL)
10893 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10894 list(all_disks))[pnode.name]
10895 node_disks.Raise("Cannot get block device information from node %s" %
10897 node_disks = node_disks.payload
10898 delta = all_disks.difference(node_disks.keys())
10900 raise errors.OpPrereqError("Missing block device(s): %s" %
10901 utils.CommaJoin(delta),
10902 errors.ECODE_INVAL)
10903 for dsk in self.disks:
10904 dsk[constants.IDISK_SIZE] = \
10905 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10907 # Verify instance specs
10908 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10910 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10911 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10912 constants.ISPEC_DISK_COUNT: len(self.disks),
10913 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10914 for disk in self.disks],
10915 constants.ISPEC_NIC_COUNT: len(self.nics),
10916 constants.ISPEC_SPINDLE_USE: spindle_use,
10919 group_info = self.cfg.GetNodeGroup(pnode.group)
10920 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10921 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10922 self.op.disk_template)
10923 if not self.op.ignore_ipolicy and res:
10924 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10925 (pnode.group, group_info.name, utils.CommaJoin(res)))
10926 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10928 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10930 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10931 # check OS parameters (remotely)
10932 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10934 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10936 #TODO: _CheckExtParams (remotely)
10937 # Check parameters for extstorage
10939 # memory check on primary node
10940 #TODO(dynmem): use MINMEM for checking
10942 _CheckNodeFreeMemory(self, self.pnode.name,
10943 "creating instance %s" % self.op.instance_name,
10944 self.be_full[constants.BE_MAXMEM],
10945 self.op.hypervisor)
10947 self.dry_run_result = list(nodenames)
10949 def Exec(self, feedback_fn):
10950 """Create and add the instance to the cluster.
10953 instance = self.op.instance_name
10954 pnode_name = self.pnode.name
10956 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10957 self.owned_locks(locking.LEVEL_NODE)), \
10958 "Node locks differ from node resource locks"
10959 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10961 ht_kind = self.op.hypervisor
10962 if ht_kind in constants.HTS_REQ_PORT:
10963 network_port = self.cfg.AllocatePort()
10965 network_port = None
10967 # This is ugly but we got a chicken-egg problem here
10968 # We can only take the group disk parameters, as the instance
10969 # has no disks yet (we are generating them right here).
10970 node = self.cfg.GetNodeInfo(pnode_name)
10971 nodegroup = self.cfg.GetNodeGroup(node.group)
10972 disks = _GenerateDiskTemplate(self,
10973 self.op.disk_template,
10974 instance, pnode_name,
10977 self.instance_file_storage_dir,
10978 self.op.file_driver,
10981 self.cfg.GetGroupDiskParams(nodegroup))
10983 iobj = objects.Instance(name=instance, os=self.op.os_type,
10984 primary_node=pnode_name,
10985 nics=self.nics, disks=disks,
10986 disk_template=self.op.disk_template,
10987 admin_state=constants.ADMINST_DOWN,
10988 network_port=network_port,
10989 beparams=self.op.beparams,
10990 hvparams=self.op.hvparams,
10991 hypervisor=self.op.hypervisor,
10992 osparams=self.op.osparams,
10996 for tag in self.op.tags:
10999 if self.adopt_disks:
11000 if self.op.disk_template == constants.DT_PLAIN:
11001 # rename LVs to the newly-generated names; we need to construct
11002 # 'fake' LV disks with the old data, plus the new unique_id
11003 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
11005 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
11006 rename_to.append(t_dsk.logical_id)
11007 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
11008 self.cfg.SetDiskID(t_dsk, pnode_name)
11009 result = self.rpc.call_blockdev_rename(pnode_name,
11010 zip(tmp_disks, rename_to))
11011 result.Raise("Failed to rename adoped LVs")
11013 feedback_fn("* creating instance disks...")
11015 _CreateDisks(self, iobj)
11016 except errors.OpExecError:
11017 self.LogWarning("Device creation failed")
11018 self.cfg.ReleaseDRBDMinors(instance)
11021 feedback_fn("adding instance %s to cluster config" % instance)
11023 self.cfg.AddInstance(iobj, self.proc.GetECId())
11025 # Declare that we don't want to remove the instance lock anymore, as we've
11026 # added the instance to the config
11027 del self.remove_locks[locking.LEVEL_INSTANCE]
11029 if self.op.mode == constants.INSTANCE_IMPORT:
11030 # Release unused nodes
11031 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11033 # Release all nodes
11034 _ReleaseLocks(self, locking.LEVEL_NODE)
11037 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11038 feedback_fn("* wiping instance disks...")
11040 _WipeDisks(self, iobj)
11041 except errors.OpExecError, err:
11042 logging.exception("Wiping disks failed")
11043 self.LogWarning("Wiping instance disks failed (%s)", err)
11047 # Something is already wrong with the disks, don't do anything else
11049 elif self.op.wait_for_sync:
11050 disk_abort = not _WaitForSync(self, iobj)
11051 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11052 # make sure the disks are not degraded (still sync-ing is ok)
11053 feedback_fn("* checking mirrors status")
11054 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11059 _RemoveDisks(self, iobj)
11060 self.cfg.RemoveInstance(iobj.name)
11061 # Make sure the instance lock gets removed
11062 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11063 raise errors.OpExecError("There are some degraded disks for"
11066 # Release all node resource locks
11067 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11069 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11070 # we need to set the disks ID to the primary node, since the
11071 # preceding code might or might have not done it, depending on
11072 # disk template and other options
11073 for disk in iobj.disks:
11074 self.cfg.SetDiskID(disk, pnode_name)
11075 if self.op.mode == constants.INSTANCE_CREATE:
11076 if not self.op.no_install:
11077 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11078 not self.op.wait_for_sync)
11080 feedback_fn("* pausing disk sync to install instance OS")
11081 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11084 for idx, success in enumerate(result.payload):
11086 logging.warn("pause-sync of instance %s for disk %d failed",
11089 feedback_fn("* running the instance OS create scripts...")
11090 # FIXME: pass debug option from opcode to backend
11092 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11093 self.op.debug_level)
11095 feedback_fn("* resuming disk sync")
11096 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11099 for idx, success in enumerate(result.payload):
11101 logging.warn("resume-sync of instance %s for disk %d failed",
11104 os_add_result.Raise("Could not add os for instance %s"
11105 " on node %s" % (instance, pnode_name))
11108 if self.op.mode == constants.INSTANCE_IMPORT:
11109 feedback_fn("* running the instance OS import scripts...")
11113 for idx, image in enumerate(self.src_images):
11117 # FIXME: pass debug option from opcode to backend
11118 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11119 constants.IEIO_FILE, (image, ),
11120 constants.IEIO_SCRIPT,
11121 (iobj.disks[idx], idx),
11123 transfers.append(dt)
11126 masterd.instance.TransferInstanceData(self, feedback_fn,
11127 self.op.src_node, pnode_name,
11128 self.pnode.secondary_ip,
11130 if not compat.all(import_result):
11131 self.LogWarning("Some disks for instance %s on node %s were not"
11132 " imported successfully" % (instance, pnode_name))
11134 rename_from = self._old_instance_name
11136 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11137 feedback_fn("* preparing remote import...")
11138 # The source cluster will stop the instance before attempting to make
11139 # a connection. In some cases stopping an instance can take a long
11140 # time, hence the shutdown timeout is added to the connection
11142 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11143 self.op.source_shutdown_timeout)
11144 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11146 assert iobj.primary_node == self.pnode.name
11148 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11149 self.source_x509_ca,
11150 self._cds, timeouts)
11151 if not compat.all(disk_results):
11152 # TODO: Should the instance still be started, even if some disks
11153 # failed to import (valid for local imports, too)?
11154 self.LogWarning("Some disks for instance %s on node %s were not"
11155 " imported successfully" % (instance, pnode_name))
11157 rename_from = self.source_instance_name
11160 # also checked in the prereq part
11161 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11164 # Run rename script on newly imported instance
11165 assert iobj.name == instance
11166 feedback_fn("Running rename script for %s" % instance)
11167 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11169 self.op.debug_level)
11170 if result.fail_msg:
11171 self.LogWarning("Failed to run rename script for %s on node"
11172 " %s: %s" % (instance, pnode_name, result.fail_msg))
11174 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11177 iobj.admin_state = constants.ADMINST_UP
11178 self.cfg.Update(iobj, feedback_fn)
11179 logging.info("Starting instance %s on node %s", instance, pnode_name)
11180 feedback_fn("* starting instance...")
11181 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11183 result.Raise("Could not start instance")
11185 return list(iobj.all_nodes)
11188 class LUInstanceMultiAlloc(NoHooksLU):
11189 """Allocates multiple instances at the same time.
11194 def CheckArguments(self):
11195 """Check arguments.
11199 for inst in self.op.instances:
11200 if inst.iallocator is not None:
11201 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11202 " instance objects", errors.ECODE_INVAL)
11203 nodes.append(bool(inst.pnode))
11204 if inst.disk_template in constants.DTS_INT_MIRROR:
11205 nodes.append(bool(inst.snode))
11207 has_nodes = compat.any(nodes)
11208 if compat.all(nodes) ^ has_nodes:
11209 raise errors.OpPrereqError("There are instance objects providing"
11210 " pnode/snode while others do not",
11211 errors.ECODE_INVAL)
11213 if self.op.iallocator is None:
11214 default_iallocator = self.cfg.GetDefaultIAllocator()
11215 if default_iallocator and has_nodes:
11216 self.op.iallocator = default_iallocator
11218 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11219 " given and no cluster-wide default"
11220 " iallocator found; please specify either"
11221 " an iallocator or nodes on the instances"
11222 " or set a cluster-wide default iallocator",
11223 errors.ECODE_INVAL)
11225 _CheckOpportunisticLocking(self.op)
11227 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11229 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11230 utils.CommaJoin(dups), errors.ECODE_INVAL)
11232 def ExpandNames(self):
11233 """Calculate the locks.
11236 self.share_locks = _ShareAll()
11237 self.needed_locks = {
11238 # iallocator will select nodes and even if no iallocator is used,
11239 # collisions with LUInstanceCreate should be avoided
11240 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11243 if self.op.iallocator:
11244 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11245 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11247 if self.op.opportunistic_locking:
11248 self.opportunistic_locks[locking.LEVEL_NODE] = True
11249 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11252 for inst in self.op.instances:
11253 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11254 nodeslist.append(inst.pnode)
11255 if inst.snode is not None:
11256 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11257 nodeslist.append(inst.snode)
11259 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11260 # Lock resources of instance's primary and secondary nodes (copy to
11261 # prevent accidential modification)
11262 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11264 def CheckPrereq(self):
11265 """Check prerequisite.
11268 cluster = self.cfg.GetClusterInfo()
11269 default_vg = self.cfg.GetVGName()
11270 ec_id = self.proc.GetECId()
11272 if self.op.opportunistic_locking:
11273 # Only consider nodes for which a lock is held
11274 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11276 node_whitelist = None
11278 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11279 _ComputeNics(op, cluster, None,
11281 _ComputeFullBeParams(op, cluster),
11283 for op in self.op.instances]
11285 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11286 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11288 ial.Run(self.op.iallocator)
11290 if not ial.success:
11291 raise errors.OpPrereqError("Can't compute nodes using"
11292 " iallocator '%s': %s" %
11293 (self.op.iallocator, ial.info),
11294 errors.ECODE_NORES)
11296 self.ia_result = ial.result
11298 if self.op.dry_run:
11299 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11300 constants.JOB_IDS_KEY: [],
11303 def _ConstructPartialResult(self):
11304 """Contructs the partial result.
11307 (allocatable, failed) = self.ia_result
11309 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11310 map(compat.fst, allocatable),
11311 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11314 def Exec(self, feedback_fn):
11315 """Executes the opcode.
11318 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11319 (allocatable, failed) = self.ia_result
11322 for (name, nodes) in allocatable:
11323 op = op2inst.pop(name)
11326 (op.pnode, op.snode) = nodes
11328 (op.pnode,) = nodes
11332 missing = set(op2inst.keys()) - set(failed)
11333 assert not missing, \
11334 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11336 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11339 def _CheckRADOSFreeSpace():
11340 """Compute disk size requirements inside the RADOS cluster.
11343 # For the RADOS cluster we assume there is always enough space.
11347 class LUInstanceConsole(NoHooksLU):
11348 """Connect to an instance's console.
11350 This is somewhat special in that it returns the command line that
11351 you need to run on the master node in order to connect to the
11357 def ExpandNames(self):
11358 self.share_locks = _ShareAll()
11359 self._ExpandAndLockInstance()
11361 def CheckPrereq(self):
11362 """Check prerequisites.
11364 This checks that the instance is in the cluster.
11367 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11368 assert self.instance is not None, \
11369 "Cannot retrieve locked instance %s" % self.op.instance_name
11370 _CheckNodeOnline(self, self.instance.primary_node)
11372 def Exec(self, feedback_fn):
11373 """Connect to the console of an instance
11376 instance = self.instance
11377 node = instance.primary_node
11379 node_insts = self.rpc.call_instance_list([node],
11380 [instance.hypervisor])[node]
11381 node_insts.Raise("Can't get node information from %s" % node)
11383 if instance.name not in node_insts.payload:
11384 if instance.admin_state == constants.ADMINST_UP:
11385 state = constants.INSTST_ERRORDOWN
11386 elif instance.admin_state == constants.ADMINST_DOWN:
11387 state = constants.INSTST_ADMINDOWN
11389 state = constants.INSTST_ADMINOFFLINE
11390 raise errors.OpExecError("Instance %s is not running (state %s)" %
11391 (instance.name, state))
11393 logging.debug("Connecting to console of %s on %s", instance.name, node)
11395 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11398 def _GetInstanceConsole(cluster, instance):
11399 """Returns console information for an instance.
11401 @type cluster: L{objects.Cluster}
11402 @type instance: L{objects.Instance}
11406 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11407 # beparams and hvparams are passed separately, to avoid editing the
11408 # instance and then saving the defaults in the instance itself.
11409 hvparams = cluster.FillHV(instance)
11410 beparams = cluster.FillBE(instance)
11411 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11413 assert console.instance == instance.name
11414 assert console.Validate()
11416 return console.ToDict()
11419 class LUInstanceReplaceDisks(LogicalUnit):
11420 """Replace the disks of an instance.
11423 HPATH = "mirrors-replace"
11424 HTYPE = constants.HTYPE_INSTANCE
11427 def CheckArguments(self):
11428 """Check arguments.
11431 remote_node = self.op.remote_node
11432 ialloc = self.op.iallocator
11433 if self.op.mode == constants.REPLACE_DISK_CHG:
11434 if remote_node is None and ialloc is None:
11435 raise errors.OpPrereqError("When changing the secondary either an"
11436 " iallocator script must be used or the"
11437 " new node given", errors.ECODE_INVAL)
11439 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11441 elif remote_node is not None or ialloc is not None:
11442 # Not replacing the secondary
11443 raise errors.OpPrereqError("The iallocator and new node options can"
11444 " only be used when changing the"
11445 " secondary node", errors.ECODE_INVAL)
11447 def ExpandNames(self):
11448 self._ExpandAndLockInstance()
11450 assert locking.LEVEL_NODE not in self.needed_locks
11451 assert locking.LEVEL_NODE_RES not in self.needed_locks
11452 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11454 assert self.op.iallocator is None or self.op.remote_node is None, \
11455 "Conflicting options"
11457 if self.op.remote_node is not None:
11458 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11460 # Warning: do not remove the locking of the new secondary here
11461 # unless DRBD8.AddChildren is changed to work in parallel;
11462 # currently it doesn't since parallel invocations of
11463 # FindUnusedMinor will conflict
11464 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11465 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11467 self.needed_locks[locking.LEVEL_NODE] = []
11468 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11470 if self.op.iallocator is not None:
11471 # iallocator will select a new node in the same group
11472 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11473 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11475 self.needed_locks[locking.LEVEL_NODE_RES] = []
11477 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11478 self.op.iallocator, self.op.remote_node,
11479 self.op.disks, self.op.early_release,
11480 self.op.ignore_ipolicy)
11482 self.tasklets = [self.replacer]
11484 def DeclareLocks(self, level):
11485 if level == locking.LEVEL_NODEGROUP:
11486 assert self.op.remote_node is None
11487 assert self.op.iallocator is not None
11488 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11490 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11491 # Lock all groups used by instance optimistically; this requires going
11492 # via the node before it's locked, requiring verification later on
11493 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11494 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11496 elif level == locking.LEVEL_NODE:
11497 if self.op.iallocator is not None:
11498 assert self.op.remote_node is None
11499 assert not self.needed_locks[locking.LEVEL_NODE]
11500 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11502 # Lock member nodes of all locked groups
11503 self.needed_locks[locking.LEVEL_NODE] = \
11505 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11506 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11508 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11510 self._LockInstancesNodes()
11512 elif level == locking.LEVEL_NODE_RES:
11514 self.needed_locks[locking.LEVEL_NODE_RES] = \
11515 self.needed_locks[locking.LEVEL_NODE]
11517 def BuildHooksEnv(self):
11518 """Build hooks env.
11520 This runs on the master, the primary and all the secondaries.
11523 instance = self.replacer.instance
11525 "MODE": self.op.mode,
11526 "NEW_SECONDARY": self.op.remote_node,
11527 "OLD_SECONDARY": instance.secondary_nodes[0],
11529 env.update(_BuildInstanceHookEnvByObject(self, instance))
11532 def BuildHooksNodes(self):
11533 """Build hooks nodes.
11536 instance = self.replacer.instance
11538 self.cfg.GetMasterNode(),
11539 instance.primary_node,
11541 if self.op.remote_node is not None:
11542 nl.append(self.op.remote_node)
11545 def CheckPrereq(self):
11546 """Check prerequisites.
11549 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11550 self.op.iallocator is None)
11552 # Verify if node group locks are still correct
11553 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11555 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11557 return LogicalUnit.CheckPrereq(self)
11560 class TLReplaceDisks(Tasklet):
11561 """Replaces disks for an instance.
11563 Note: Locking is not within the scope of this class.
11566 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11567 disks, early_release, ignore_ipolicy):
11568 """Initializes this class.
11571 Tasklet.__init__(self, lu)
11574 self.instance_name = instance_name
11576 self.iallocator_name = iallocator_name
11577 self.remote_node = remote_node
11579 self.early_release = early_release
11580 self.ignore_ipolicy = ignore_ipolicy
11583 self.instance = None
11584 self.new_node = None
11585 self.target_node = None
11586 self.other_node = None
11587 self.remote_node_info = None
11588 self.node_secondary_ip = None
11591 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11592 """Compute a new secondary node using an IAllocator.
11595 req = iallocator.IAReqRelocate(name=instance_name,
11596 relocate_from=list(relocate_from))
11597 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11599 ial.Run(iallocator_name)
11601 if not ial.success:
11602 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11603 " %s" % (iallocator_name, ial.info),
11604 errors.ECODE_NORES)
11606 remote_node_name = ial.result[0]
11608 lu.LogInfo("Selected new secondary for instance '%s': %s",
11609 instance_name, remote_node_name)
11611 return remote_node_name
11613 def _FindFaultyDisks(self, node_name):
11614 """Wrapper for L{_FindFaultyInstanceDisks}.
11617 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11620 def _CheckDisksActivated(self, instance):
11621 """Checks if the instance disks are activated.
11623 @param instance: The instance to check disks
11624 @return: True if they are activated, False otherwise
11627 nodes = instance.all_nodes
11629 for idx, dev in enumerate(instance.disks):
11631 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11632 self.cfg.SetDiskID(dev, node)
11634 result = _BlockdevFind(self, node, dev, instance)
11638 elif result.fail_msg or not result.payload:
11643 def CheckPrereq(self):
11644 """Check prerequisites.
11646 This checks that the instance is in the cluster.
11649 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11650 assert instance is not None, \
11651 "Cannot retrieve locked instance %s" % self.instance_name
11653 if instance.disk_template != constants.DT_DRBD8:
11654 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11655 " instances", errors.ECODE_INVAL)
11657 if len(instance.secondary_nodes) != 1:
11658 raise errors.OpPrereqError("The instance has a strange layout,"
11659 " expected one secondary but found %d" %
11660 len(instance.secondary_nodes),
11661 errors.ECODE_FAULT)
11663 instance = self.instance
11664 secondary_node = instance.secondary_nodes[0]
11666 if self.iallocator_name is None:
11667 remote_node = self.remote_node
11669 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11670 instance.name, instance.secondary_nodes)
11672 if remote_node is None:
11673 self.remote_node_info = None
11675 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11676 "Remote node '%s' is not locked" % remote_node
11678 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11679 assert self.remote_node_info is not None, \
11680 "Cannot retrieve locked node %s" % remote_node
11682 if remote_node == self.instance.primary_node:
11683 raise errors.OpPrereqError("The specified node is the primary node of"
11684 " the instance", errors.ECODE_INVAL)
11686 if remote_node == secondary_node:
11687 raise errors.OpPrereqError("The specified node is already the"
11688 " secondary node of the instance",
11689 errors.ECODE_INVAL)
11691 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11692 constants.REPLACE_DISK_CHG):
11693 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11694 errors.ECODE_INVAL)
11696 if self.mode == constants.REPLACE_DISK_AUTO:
11697 if not self._CheckDisksActivated(instance):
11698 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11699 " first" % self.instance_name,
11700 errors.ECODE_STATE)
11701 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11702 faulty_secondary = self._FindFaultyDisks(secondary_node)
11704 if faulty_primary and faulty_secondary:
11705 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11706 " one node and can not be repaired"
11707 " automatically" % self.instance_name,
11708 errors.ECODE_STATE)
11711 self.disks = faulty_primary
11712 self.target_node = instance.primary_node
11713 self.other_node = secondary_node
11714 check_nodes = [self.target_node, self.other_node]
11715 elif faulty_secondary:
11716 self.disks = faulty_secondary
11717 self.target_node = secondary_node
11718 self.other_node = instance.primary_node
11719 check_nodes = [self.target_node, self.other_node]
11725 # Non-automatic modes
11726 if self.mode == constants.REPLACE_DISK_PRI:
11727 self.target_node = instance.primary_node
11728 self.other_node = secondary_node
11729 check_nodes = [self.target_node, self.other_node]
11731 elif self.mode == constants.REPLACE_DISK_SEC:
11732 self.target_node = secondary_node
11733 self.other_node = instance.primary_node
11734 check_nodes = [self.target_node, self.other_node]
11736 elif self.mode == constants.REPLACE_DISK_CHG:
11737 self.new_node = remote_node
11738 self.other_node = instance.primary_node
11739 self.target_node = secondary_node
11740 check_nodes = [self.new_node, self.other_node]
11742 _CheckNodeNotDrained(self.lu, remote_node)
11743 _CheckNodeVmCapable(self.lu, remote_node)
11745 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11746 assert old_node_info is not None
11747 if old_node_info.offline and not self.early_release:
11748 # doesn't make sense to delay the release
11749 self.early_release = True
11750 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11751 " early-release mode", secondary_node)
11754 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11757 # If not specified all disks should be replaced
11759 self.disks = range(len(self.instance.disks))
11761 # TODO: This is ugly, but right now we can't distinguish between internal
11762 # submitted opcode and external one. We should fix that.
11763 if self.remote_node_info:
11764 # We change the node, lets verify it still meets instance policy
11765 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11766 cluster = self.cfg.GetClusterInfo()
11767 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11769 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11770 self.cfg, ignore=self.ignore_ipolicy)
11772 for node in check_nodes:
11773 _CheckNodeOnline(self.lu, node)
11775 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11778 if node_name is not None)
11780 # Release unneeded node and node resource locks
11781 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11782 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11783 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11785 # Release any owned node group
11786 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11788 # Check whether disks are valid
11789 for disk_idx in self.disks:
11790 instance.FindDisk(disk_idx)
11792 # Get secondary node IP addresses
11793 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11794 in self.cfg.GetMultiNodeInfo(touched_nodes))
11796 def Exec(self, feedback_fn):
11797 """Execute disk replacement.
11799 This dispatches the disk replacement to the appropriate handler.
11803 # Verify owned locks before starting operation
11804 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11805 assert set(owned_nodes) == set(self.node_secondary_ip), \
11806 ("Incorrect node locks, owning %s, expected %s" %
11807 (owned_nodes, self.node_secondary_ip.keys()))
11808 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11809 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11810 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11812 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11813 assert list(owned_instances) == [self.instance_name], \
11814 "Instance '%s' not locked" % self.instance_name
11816 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11817 "Should not own any node group lock at this point"
11820 feedback_fn("No disks need replacement for instance '%s'" %
11821 self.instance.name)
11824 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11825 (utils.CommaJoin(self.disks), self.instance.name))
11826 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11827 feedback_fn("Current seconary node: %s" %
11828 utils.CommaJoin(self.instance.secondary_nodes))
11830 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11832 # Activate the instance disks if we're replacing them on a down instance
11834 _StartInstanceDisks(self.lu, self.instance, True)
11837 # Should we replace the secondary node?
11838 if self.new_node is not None:
11839 fn = self._ExecDrbd8Secondary
11841 fn = self._ExecDrbd8DiskOnly
11843 result = fn(feedback_fn)
11845 # Deactivate the instance disks if we're replacing them on a
11848 _SafeShutdownInstanceDisks(self.lu, self.instance)
11850 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11853 # Verify owned locks
11854 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11855 nodes = frozenset(self.node_secondary_ip)
11856 assert ((self.early_release and not owned_nodes) or
11857 (not self.early_release and not (set(owned_nodes) - nodes))), \
11858 ("Not owning the correct locks, early_release=%s, owned=%r,"
11859 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11863 def _CheckVolumeGroup(self, nodes):
11864 self.lu.LogInfo("Checking volume groups")
11866 vgname = self.cfg.GetVGName()
11868 # Make sure volume group exists on all involved nodes
11869 results = self.rpc.call_vg_list(nodes)
11871 raise errors.OpExecError("Can't list volume groups on the nodes")
11874 res = results[node]
11875 res.Raise("Error checking node %s" % node)
11876 if vgname not in res.payload:
11877 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11880 def _CheckDisksExistence(self, nodes):
11881 # Check disk existence
11882 for idx, dev in enumerate(self.instance.disks):
11883 if idx not in self.disks:
11887 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11888 self.cfg.SetDiskID(dev, node)
11890 result = _BlockdevFind(self, node, dev, self.instance)
11892 msg = result.fail_msg
11893 if msg or not result.payload:
11895 msg = "disk not found"
11896 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11899 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11900 for idx, dev in enumerate(self.instance.disks):
11901 if idx not in self.disks:
11904 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11907 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11908 on_primary, ldisk=ldisk):
11909 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11910 " replace disks for instance %s" %
11911 (node_name, self.instance.name))
11913 def _CreateNewStorage(self, node_name):
11914 """Create new storage on the primary or secondary node.
11916 This is only used for same-node replaces, not for changing the
11917 secondary node, hence we don't want to modify the existing disk.
11922 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11923 for idx, dev in enumerate(disks):
11924 if idx not in self.disks:
11927 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11929 self.cfg.SetDiskID(dev, node_name)
11931 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11932 names = _GenerateUniqueNames(self.lu, lv_names)
11934 (data_disk, meta_disk) = dev.children
11935 vg_data = data_disk.logical_id[0]
11936 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11937 logical_id=(vg_data, names[0]),
11938 params=data_disk.params)
11939 vg_meta = meta_disk.logical_id[0]
11940 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11941 size=constants.DRBD_META_SIZE,
11942 logical_id=(vg_meta, names[1]),
11943 params=meta_disk.params)
11945 new_lvs = [lv_data, lv_meta]
11946 old_lvs = [child.Copy() for child in dev.children]
11947 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11948 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11950 # we pass force_create=True to force the LVM creation
11951 for new_lv in new_lvs:
11952 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11953 _GetInstanceInfoText(self.instance), False,
11958 def _CheckDevices(self, node_name, iv_names):
11959 for name, (dev, _, _) in iv_names.iteritems():
11960 self.cfg.SetDiskID(dev, node_name)
11962 result = _BlockdevFind(self, node_name, dev, self.instance)
11964 msg = result.fail_msg
11965 if msg or not result.payload:
11967 msg = "disk not found"
11968 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11971 if result.payload.is_degraded:
11972 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11974 def _RemoveOldStorage(self, node_name, iv_names):
11975 for name, (_, old_lvs, _) in iv_names.iteritems():
11976 self.lu.LogInfo("Remove logical volumes for %s", name)
11979 self.cfg.SetDiskID(lv, node_name)
11981 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11983 self.lu.LogWarning("Can't remove old LV: %s", msg,
11984 hint="remove unused LVs manually")
11986 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11987 """Replace a disk on the primary or secondary for DRBD 8.
11989 The algorithm for replace is quite complicated:
11991 1. for each disk to be replaced:
11993 1. create new LVs on the target node with unique names
11994 1. detach old LVs from the drbd device
11995 1. rename old LVs to name_replaced.<time_t>
11996 1. rename new LVs to old LVs
11997 1. attach the new LVs (with the old names now) to the drbd device
11999 1. wait for sync across all devices
12001 1. for each modified disk:
12003 1. remove old LVs (which have the name name_replaces.<time_t>)
12005 Failures are not very well handled.
12010 # Step: check device activation
12011 self.lu.LogStep(1, steps_total, "Check device existence")
12012 self._CheckDisksExistence([self.other_node, self.target_node])
12013 self._CheckVolumeGroup([self.target_node, self.other_node])
12015 # Step: check other node consistency
12016 self.lu.LogStep(2, steps_total, "Check peer consistency")
12017 self._CheckDisksConsistency(self.other_node,
12018 self.other_node == self.instance.primary_node,
12021 # Step: create new storage
12022 self.lu.LogStep(3, steps_total, "Allocate new storage")
12023 iv_names = self._CreateNewStorage(self.target_node)
12025 # Step: for each lv, detach+rename*2+attach
12026 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12027 for dev, old_lvs, new_lvs in iv_names.itervalues():
12028 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12030 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12032 result.Raise("Can't detach drbd from local storage on node"
12033 " %s for device %s" % (self.target_node, dev.iv_name))
12035 #cfg.Update(instance)
12037 # ok, we created the new LVs, so now we know we have the needed
12038 # storage; as such, we proceed on the target node to rename
12039 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12040 # using the assumption that logical_id == physical_id (which in
12041 # turn is the unique_id on that node)
12043 # FIXME(iustin): use a better name for the replaced LVs
12044 temp_suffix = int(time.time())
12045 ren_fn = lambda d, suff: (d.physical_id[0],
12046 d.physical_id[1] + "_replaced-%s" % suff)
12048 # Build the rename list based on what LVs exist on the node
12049 rename_old_to_new = []
12050 for to_ren in old_lvs:
12051 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12052 if not result.fail_msg and result.payload:
12054 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12056 self.lu.LogInfo("Renaming the old LVs on the target node")
12057 result = self.rpc.call_blockdev_rename(self.target_node,
12059 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12061 # Now we rename the new LVs to the old LVs
12062 self.lu.LogInfo("Renaming the new LVs on the target node")
12063 rename_new_to_old = [(new, old.physical_id)
12064 for old, new in zip(old_lvs, new_lvs)]
12065 result = self.rpc.call_blockdev_rename(self.target_node,
12067 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12069 # Intermediate steps of in memory modifications
12070 for old, new in zip(old_lvs, new_lvs):
12071 new.logical_id = old.logical_id
12072 self.cfg.SetDiskID(new, self.target_node)
12074 # We need to modify old_lvs so that removal later removes the
12075 # right LVs, not the newly added ones; note that old_lvs is a
12077 for disk in old_lvs:
12078 disk.logical_id = ren_fn(disk, temp_suffix)
12079 self.cfg.SetDiskID(disk, self.target_node)
12081 # Now that the new lvs have the old name, we can add them to the device
12082 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12083 result = self.rpc.call_blockdev_addchildren(self.target_node,
12084 (dev, self.instance), new_lvs)
12085 msg = result.fail_msg
12087 for new_lv in new_lvs:
12088 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12091 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12092 hint=("cleanup manually the unused logical"
12094 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12096 cstep = itertools.count(5)
12098 if self.early_release:
12099 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12100 self._RemoveOldStorage(self.target_node, iv_names)
12101 # TODO: Check if releasing locks early still makes sense
12102 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12104 # Release all resource locks except those used by the instance
12105 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12106 keep=self.node_secondary_ip.keys())
12108 # Release all node locks while waiting for sync
12109 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12111 # TODO: Can the instance lock be downgraded here? Take the optional disk
12112 # shutdown in the caller into consideration.
12115 # This can fail as the old devices are degraded and _WaitForSync
12116 # does a combined result over all disks, so we don't check its return value
12117 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12118 _WaitForSync(self.lu, self.instance)
12120 # Check all devices manually
12121 self._CheckDevices(self.instance.primary_node, iv_names)
12123 # Step: remove old storage
12124 if not self.early_release:
12125 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12126 self._RemoveOldStorage(self.target_node, iv_names)
12128 def _ExecDrbd8Secondary(self, feedback_fn):
12129 """Replace the secondary node for DRBD 8.
12131 The algorithm for replace is quite complicated:
12132 - for all disks of the instance:
12133 - create new LVs on the new node with same names
12134 - shutdown the drbd device on the old secondary
12135 - disconnect the drbd network on the primary
12136 - create the drbd device on the new secondary
12137 - network attach the drbd on the primary, using an artifice:
12138 the drbd code for Attach() will connect to the network if it
12139 finds a device which is connected to the good local disks but
12140 not network enabled
12141 - wait for sync across all devices
12142 - remove all disks from the old secondary
12144 Failures are not very well handled.
12149 pnode = self.instance.primary_node
12151 # Step: check device activation
12152 self.lu.LogStep(1, steps_total, "Check device existence")
12153 self._CheckDisksExistence([self.instance.primary_node])
12154 self._CheckVolumeGroup([self.instance.primary_node])
12156 # Step: check other node consistency
12157 self.lu.LogStep(2, steps_total, "Check peer consistency")
12158 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12160 # Step: create new storage
12161 self.lu.LogStep(3, steps_total, "Allocate new storage")
12162 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12163 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12164 for idx, dev in enumerate(disks):
12165 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12166 (self.new_node, idx))
12167 # we pass force_create=True to force LVM creation
12168 for new_lv in dev.children:
12169 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12170 True, _GetInstanceInfoText(self.instance), False,
12173 # Step 4: dbrd minors and drbd setups changes
12174 # after this, we must manually remove the drbd minors on both the
12175 # error and the success paths
12176 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12177 minors = self.cfg.AllocateDRBDMinor([self.new_node
12178 for dev in self.instance.disks],
12179 self.instance.name)
12180 logging.debug("Allocated minors %r", minors)
12183 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12184 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12185 (self.new_node, idx))
12186 # create new devices on new_node; note that we create two IDs:
12187 # one without port, so the drbd will be activated without
12188 # networking information on the new node at this stage, and one
12189 # with network, for the latter activation in step 4
12190 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12191 if self.instance.primary_node == o_node1:
12194 assert self.instance.primary_node == o_node2, "Three-node instance?"
12197 new_alone_id = (self.instance.primary_node, self.new_node, None,
12198 p_minor, new_minor, o_secret)
12199 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12200 p_minor, new_minor, o_secret)
12202 iv_names[idx] = (dev, dev.children, new_net_id)
12203 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12205 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12206 logical_id=new_alone_id,
12207 children=dev.children,
12210 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12213 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12215 _GetInstanceInfoText(self.instance), False,
12217 except errors.GenericError:
12218 self.cfg.ReleaseDRBDMinors(self.instance.name)
12221 # We have new devices, shutdown the drbd on the old secondary
12222 for idx, dev in enumerate(self.instance.disks):
12223 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12224 self.cfg.SetDiskID(dev, self.target_node)
12225 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12226 (dev, self.instance)).fail_msg
12228 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12229 "node: %s" % (idx, msg),
12230 hint=("Please cleanup this device manually as"
12231 " soon as possible"))
12233 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12234 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12235 self.instance.disks)[pnode]
12237 msg = result.fail_msg
12239 # detaches didn't succeed (unlikely)
12240 self.cfg.ReleaseDRBDMinors(self.instance.name)
12241 raise errors.OpExecError("Can't detach the disks from the network on"
12242 " old node: %s" % (msg,))
12244 # if we managed to detach at least one, we update all the disks of
12245 # the instance to point to the new secondary
12246 self.lu.LogInfo("Updating instance configuration")
12247 for dev, _, new_logical_id in iv_names.itervalues():
12248 dev.logical_id = new_logical_id
12249 self.cfg.SetDiskID(dev, self.instance.primary_node)
12251 self.cfg.Update(self.instance, feedback_fn)
12253 # Release all node locks (the configuration has been updated)
12254 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12256 # and now perform the drbd attach
12257 self.lu.LogInfo("Attaching primary drbds to new secondary"
12258 " (standalone => connected)")
12259 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12261 self.node_secondary_ip,
12262 (self.instance.disks, self.instance),
12263 self.instance.name,
12265 for to_node, to_result in result.items():
12266 msg = to_result.fail_msg
12268 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12270 hint=("please do a gnt-instance info to see the"
12271 " status of disks"))
12273 cstep = itertools.count(5)
12275 if self.early_release:
12276 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12277 self._RemoveOldStorage(self.target_node, iv_names)
12278 # TODO: Check if releasing locks early still makes sense
12279 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12281 # Release all resource locks except those used by the instance
12282 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12283 keep=self.node_secondary_ip.keys())
12285 # TODO: Can the instance lock be downgraded here? Take the optional disk
12286 # shutdown in the caller into consideration.
12289 # This can fail as the old devices are degraded and _WaitForSync
12290 # does a combined result over all disks, so we don't check its return value
12291 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12292 _WaitForSync(self.lu, self.instance)
12294 # Check all devices manually
12295 self._CheckDevices(self.instance.primary_node, iv_names)
12297 # Step: remove old storage
12298 if not self.early_release:
12299 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12300 self._RemoveOldStorage(self.target_node, iv_names)
12303 class LURepairNodeStorage(NoHooksLU):
12304 """Repairs the volume group on a node.
12309 def CheckArguments(self):
12310 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12312 storage_type = self.op.storage_type
12314 if (constants.SO_FIX_CONSISTENCY not in
12315 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12316 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12317 " repaired" % storage_type,
12318 errors.ECODE_INVAL)
12320 def ExpandNames(self):
12321 self.needed_locks = {
12322 locking.LEVEL_NODE: [self.op.node_name],
12325 def _CheckFaultyDisks(self, instance, node_name):
12326 """Ensure faulty disks abort the opcode or at least warn."""
12328 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12330 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12331 " node '%s'" % (instance.name, node_name),
12332 errors.ECODE_STATE)
12333 except errors.OpPrereqError, err:
12334 if self.op.ignore_consistency:
12335 self.LogWarning(str(err.args[0]))
12339 def CheckPrereq(self):
12340 """Check prerequisites.
12343 # Check whether any instance on this node has faulty disks
12344 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12345 if inst.admin_state != constants.ADMINST_UP:
12347 check_nodes = set(inst.all_nodes)
12348 check_nodes.discard(self.op.node_name)
12349 for inst_node_name in check_nodes:
12350 self._CheckFaultyDisks(inst, inst_node_name)
12352 def Exec(self, feedback_fn):
12353 feedback_fn("Repairing storage unit '%s' on %s ..." %
12354 (self.op.name, self.op.node_name))
12356 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12357 result = self.rpc.call_storage_execute(self.op.node_name,
12358 self.op.storage_type, st_args,
12360 constants.SO_FIX_CONSISTENCY)
12361 result.Raise("Failed to repair storage unit '%s' on %s" %
12362 (self.op.name, self.op.node_name))
12365 class LUNodeEvacuate(NoHooksLU):
12366 """Evacuates instances off a list of nodes.
12371 _MODE2IALLOCATOR = {
12372 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12373 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12374 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12376 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12377 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12378 constants.IALLOCATOR_NEVAC_MODES)
12380 def CheckArguments(self):
12381 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12383 def ExpandNames(self):
12384 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12386 if self.op.remote_node is not None:
12387 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12388 assert self.op.remote_node
12390 if self.op.remote_node == self.op.node_name:
12391 raise errors.OpPrereqError("Can not use evacuated node as a new"
12392 " secondary node", errors.ECODE_INVAL)
12394 if self.op.mode != constants.NODE_EVAC_SEC:
12395 raise errors.OpPrereqError("Without the use of an iallocator only"
12396 " secondary instances can be evacuated",
12397 errors.ECODE_INVAL)
12400 self.share_locks = _ShareAll()
12401 self.needed_locks = {
12402 locking.LEVEL_INSTANCE: [],
12403 locking.LEVEL_NODEGROUP: [],
12404 locking.LEVEL_NODE: [],
12407 # Determine nodes (via group) optimistically, needs verification once locks
12408 # have been acquired
12409 self.lock_nodes = self._DetermineNodes()
12411 def _DetermineNodes(self):
12412 """Gets the list of nodes to operate on.
12415 if self.op.remote_node is None:
12416 # Iallocator will choose any node(s) in the same group
12417 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12419 group_nodes = frozenset([self.op.remote_node])
12421 # Determine nodes to be locked
12422 return set([self.op.node_name]) | group_nodes
12424 def _DetermineInstances(self):
12425 """Builds list of instances to operate on.
12428 assert self.op.mode in constants.NODE_EVAC_MODES
12430 if self.op.mode == constants.NODE_EVAC_PRI:
12431 # Primary instances only
12432 inst_fn = _GetNodePrimaryInstances
12433 assert self.op.remote_node is None, \
12434 "Evacuating primary instances requires iallocator"
12435 elif self.op.mode == constants.NODE_EVAC_SEC:
12436 # Secondary instances only
12437 inst_fn = _GetNodeSecondaryInstances
12440 assert self.op.mode == constants.NODE_EVAC_ALL
12441 inst_fn = _GetNodeInstances
12442 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12444 raise errors.OpPrereqError("Due to an issue with the iallocator"
12445 " interface it is not possible to evacuate"
12446 " all instances at once; specify explicitly"
12447 " whether to evacuate primary or secondary"
12449 errors.ECODE_INVAL)
12451 return inst_fn(self.cfg, self.op.node_name)
12453 def DeclareLocks(self, level):
12454 if level == locking.LEVEL_INSTANCE:
12455 # Lock instances optimistically, needs verification once node and group
12456 # locks have been acquired
12457 self.needed_locks[locking.LEVEL_INSTANCE] = \
12458 set(i.name for i in self._DetermineInstances())
12460 elif level == locking.LEVEL_NODEGROUP:
12461 # Lock node groups for all potential target nodes optimistically, needs
12462 # verification once nodes have been acquired
12463 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12464 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12466 elif level == locking.LEVEL_NODE:
12467 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12469 def CheckPrereq(self):
12471 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12472 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12473 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12475 need_nodes = self._DetermineNodes()
12477 if not owned_nodes.issuperset(need_nodes):
12478 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12479 " locks were acquired, current nodes are"
12480 " are '%s', used to be '%s'; retry the"
12482 (self.op.node_name,
12483 utils.CommaJoin(need_nodes),
12484 utils.CommaJoin(owned_nodes)),
12485 errors.ECODE_STATE)
12487 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12488 if owned_groups != wanted_groups:
12489 raise errors.OpExecError("Node groups changed since locks were acquired,"
12490 " current groups are '%s', used to be '%s';"
12491 " retry the operation" %
12492 (utils.CommaJoin(wanted_groups),
12493 utils.CommaJoin(owned_groups)))
12495 # Determine affected instances
12496 self.instances = self._DetermineInstances()
12497 self.instance_names = [i.name for i in self.instances]
12499 if set(self.instance_names) != owned_instances:
12500 raise errors.OpExecError("Instances on node '%s' changed since locks"
12501 " were acquired, current instances are '%s',"
12502 " used to be '%s'; retry the operation" %
12503 (self.op.node_name,
12504 utils.CommaJoin(self.instance_names),
12505 utils.CommaJoin(owned_instances)))
12507 if self.instance_names:
12508 self.LogInfo("Evacuating instances from node '%s': %s",
12510 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12512 self.LogInfo("No instances to evacuate from node '%s'",
12515 if self.op.remote_node is not None:
12516 for i in self.instances:
12517 if i.primary_node == self.op.remote_node:
12518 raise errors.OpPrereqError("Node %s is the primary node of"
12519 " instance %s, cannot use it as"
12521 (self.op.remote_node, i.name),
12522 errors.ECODE_INVAL)
12524 def Exec(self, feedback_fn):
12525 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12527 if not self.instance_names:
12528 # No instances to evacuate
12531 elif self.op.iallocator is not None:
12532 # TODO: Implement relocation to other group
12533 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12534 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12535 instances=list(self.instance_names))
12536 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12538 ial.Run(self.op.iallocator)
12540 if not ial.success:
12541 raise errors.OpPrereqError("Can't compute node evacuation using"
12542 " iallocator '%s': %s" %
12543 (self.op.iallocator, ial.info),
12544 errors.ECODE_NORES)
12546 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12548 elif self.op.remote_node is not None:
12549 assert self.op.mode == constants.NODE_EVAC_SEC
12551 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12552 remote_node=self.op.remote_node,
12554 mode=constants.REPLACE_DISK_CHG,
12555 early_release=self.op.early_release)]
12556 for instance_name in self.instance_names]
12559 raise errors.ProgrammerError("No iallocator or remote node")
12561 return ResultWithJobs(jobs)
12564 def _SetOpEarlyRelease(early_release, op):
12565 """Sets C{early_release} flag on opcodes if available.
12569 op.early_release = early_release
12570 except AttributeError:
12571 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12576 def _NodeEvacDest(use_nodes, group, nodes):
12577 """Returns group or nodes depending on caller's choice.
12581 return utils.CommaJoin(nodes)
12586 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12587 """Unpacks the result of change-group and node-evacuate iallocator requests.
12589 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12590 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12592 @type lu: L{LogicalUnit}
12593 @param lu: Logical unit instance
12594 @type alloc_result: tuple/list
12595 @param alloc_result: Result from iallocator
12596 @type early_release: bool
12597 @param early_release: Whether to release locks early if possible
12598 @type use_nodes: bool
12599 @param use_nodes: Whether to display node names instead of groups
12602 (moved, failed, jobs) = alloc_result
12605 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12606 for (name, reason) in failed)
12607 lu.LogWarning("Unable to evacuate instances %s", failreason)
12608 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12611 lu.LogInfo("Instances to be moved: %s",
12612 utils.CommaJoin("%s (to %s)" %
12613 (name, _NodeEvacDest(use_nodes, group, nodes))
12614 for (name, group, nodes) in moved))
12616 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12617 map(opcodes.OpCode.LoadOpCode, ops))
12621 def _DiskSizeInBytesToMebibytes(lu, size):
12622 """Converts a disk size in bytes to mebibytes.
12624 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12627 (mib, remainder) = divmod(size, 1024 * 1024)
12630 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12631 " to not overwrite existing data (%s bytes will not be"
12632 " wiped)", (1024 * 1024) - remainder)
12638 class LUInstanceGrowDisk(LogicalUnit):
12639 """Grow a disk of an instance.
12642 HPATH = "disk-grow"
12643 HTYPE = constants.HTYPE_INSTANCE
12646 def ExpandNames(self):
12647 self._ExpandAndLockInstance()
12648 self.needed_locks[locking.LEVEL_NODE] = []
12649 self.needed_locks[locking.LEVEL_NODE_RES] = []
12650 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12651 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12653 def DeclareLocks(self, level):
12654 if level == locking.LEVEL_NODE:
12655 self._LockInstancesNodes()
12656 elif level == locking.LEVEL_NODE_RES:
12658 self.needed_locks[locking.LEVEL_NODE_RES] = \
12659 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12661 def BuildHooksEnv(self):
12662 """Build hooks env.
12664 This runs on the master, the primary and all the secondaries.
12668 "DISK": self.op.disk,
12669 "AMOUNT": self.op.amount,
12670 "ABSOLUTE": self.op.absolute,
12672 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12675 def BuildHooksNodes(self):
12676 """Build hooks nodes.
12679 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12682 def CheckPrereq(self):
12683 """Check prerequisites.
12685 This checks that the instance is in the cluster.
12688 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12689 assert instance is not None, \
12690 "Cannot retrieve locked instance %s" % self.op.instance_name
12691 nodenames = list(instance.all_nodes)
12692 for node in nodenames:
12693 _CheckNodeOnline(self, node)
12695 self.instance = instance
12697 if instance.disk_template not in constants.DTS_GROWABLE:
12698 raise errors.OpPrereqError("Instance's disk layout does not support"
12699 " growing", errors.ECODE_INVAL)
12701 self.disk = instance.FindDisk(self.op.disk)
12703 if self.op.absolute:
12704 self.target = self.op.amount
12705 self.delta = self.target - self.disk.size
12707 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12708 "current disk size (%s)" %
12709 (utils.FormatUnit(self.target, "h"),
12710 utils.FormatUnit(self.disk.size, "h")),
12711 errors.ECODE_STATE)
12713 self.delta = self.op.amount
12714 self.target = self.disk.size + self.delta
12716 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12717 utils.FormatUnit(self.delta, "h"),
12718 errors.ECODE_INVAL)
12720 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12722 def _CheckDiskSpace(self, nodenames, req_vgspace):
12723 template = self.instance.disk_template
12724 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12725 # TODO: check the free disk space for file, when that feature will be
12727 nodes = map(self.cfg.GetNodeInfo, nodenames)
12728 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12731 # With exclusive storage we need to something smarter than just looking
12732 # at free space; for now, let's simply abort the operation.
12733 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12734 " is enabled", errors.ECODE_STATE)
12735 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12737 def Exec(self, feedback_fn):
12738 """Execute disk grow.
12741 instance = self.instance
12744 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12745 assert (self.owned_locks(locking.LEVEL_NODE) ==
12746 self.owned_locks(locking.LEVEL_NODE_RES))
12748 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12750 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12752 raise errors.OpExecError("Cannot activate block device to grow")
12754 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12755 (self.op.disk, instance.name,
12756 utils.FormatUnit(self.delta, "h"),
12757 utils.FormatUnit(self.target, "h")))
12759 # First run all grow ops in dry-run mode
12760 for node in instance.all_nodes:
12761 self.cfg.SetDiskID(disk, node)
12762 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12764 result.Raise("Dry-run grow request failed to node %s" % node)
12767 # Get disk size from primary node for wiping
12768 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12769 result.Raise("Failed to retrieve disk size from node '%s'" %
12770 instance.primary_node)
12772 (disk_size_in_bytes, ) = result.payload
12774 if disk_size_in_bytes is None:
12775 raise errors.OpExecError("Failed to retrieve disk size from primary"
12776 " node '%s'" % instance.primary_node)
12778 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12780 assert old_disk_size >= disk.size, \
12781 ("Retrieved disk size too small (got %s, should be at least %s)" %
12782 (old_disk_size, disk.size))
12784 old_disk_size = None
12786 # We know that (as far as we can test) operations across different
12787 # nodes will succeed, time to run it for real on the backing storage
12788 for node in instance.all_nodes:
12789 self.cfg.SetDiskID(disk, node)
12790 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12792 result.Raise("Grow request failed to node %s" % node)
12794 # And now execute it for logical storage, on the primary node
12795 node = instance.primary_node
12796 self.cfg.SetDiskID(disk, node)
12797 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12799 result.Raise("Grow request failed to node %s" % node)
12801 disk.RecordGrow(self.delta)
12802 self.cfg.Update(instance, feedback_fn)
12804 # Changes have been recorded, release node lock
12805 _ReleaseLocks(self, locking.LEVEL_NODE)
12807 # Downgrade lock while waiting for sync
12808 self.glm.downgrade(locking.LEVEL_INSTANCE)
12810 assert wipe_disks ^ (old_disk_size is None)
12813 assert instance.disks[self.op.disk] == disk
12815 # Wipe newly added disk space
12816 _WipeDisks(self, instance,
12817 disks=[(self.op.disk, disk, old_disk_size)])
12819 if self.op.wait_for_sync:
12820 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12822 self.LogWarning("Disk syncing has not returned a good status; check"
12824 if instance.admin_state != constants.ADMINST_UP:
12825 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12826 elif instance.admin_state != constants.ADMINST_UP:
12827 self.LogWarning("Not shutting down the disk even if the instance is"
12828 " not supposed to be running because no wait for"
12829 " sync mode was requested")
12831 assert self.owned_locks(locking.LEVEL_NODE_RES)
12832 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12835 class LUInstanceQueryData(NoHooksLU):
12836 """Query runtime instance data.
12841 def ExpandNames(self):
12842 self.needed_locks = {}
12844 # Use locking if requested or when non-static information is wanted
12845 if not (self.op.static or self.op.use_locking):
12846 self.LogWarning("Non-static data requested, locks need to be acquired")
12847 self.op.use_locking = True
12849 if self.op.instances or not self.op.use_locking:
12850 # Expand instance names right here
12851 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12853 # Will use acquired locks
12854 self.wanted_names = None
12856 if self.op.use_locking:
12857 self.share_locks = _ShareAll()
12859 if self.wanted_names is None:
12860 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12862 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12864 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12865 self.needed_locks[locking.LEVEL_NODE] = []
12866 self.needed_locks[locking.LEVEL_NETWORK] = []
12867 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12869 def DeclareLocks(self, level):
12870 if self.op.use_locking:
12871 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12872 if level == locking.LEVEL_NODEGROUP:
12874 # Lock all groups used by instances optimistically; this requires going
12875 # via the node before it's locked, requiring verification later on
12876 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12877 frozenset(group_uuid
12878 for instance_name in owned_instances
12880 self.cfg.GetInstanceNodeGroups(instance_name))
12882 elif level == locking.LEVEL_NODE:
12883 self._LockInstancesNodes()
12885 elif level == locking.LEVEL_NETWORK:
12886 self.needed_locks[locking.LEVEL_NETWORK] = \
12888 for instance_name in owned_instances
12890 self.cfg.GetInstanceNetworks(instance_name))
12892 def CheckPrereq(self):
12893 """Check prerequisites.
12895 This only checks the optional instance list against the existing names.
12898 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12899 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12900 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12901 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12903 if self.wanted_names is None:
12904 assert self.op.use_locking, "Locking was not used"
12905 self.wanted_names = owned_instances
12907 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12909 if self.op.use_locking:
12910 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12913 assert not (owned_instances or owned_groups or
12914 owned_nodes or owned_networks)
12916 self.wanted_instances = instances.values()
12918 def _ComputeBlockdevStatus(self, node, instance, dev):
12919 """Returns the status of a block device
12922 if self.op.static or not node:
12925 self.cfg.SetDiskID(dev, node)
12927 result = self.rpc.call_blockdev_find(node, dev)
12931 result.Raise("Can't compute disk status for %s" % instance.name)
12933 status = result.payload
12937 return (status.dev_path, status.major, status.minor,
12938 status.sync_percent, status.estimated_time,
12939 status.is_degraded, status.ldisk_status)
12941 def _ComputeDiskStatus(self, instance, snode, dev):
12942 """Compute block device status.
12945 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12947 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12949 def _ComputeDiskStatusInner(self, instance, snode, dev):
12950 """Compute block device status.
12952 @attention: The device has to be annotated already.
12955 if dev.dev_type in constants.LDS_DRBD:
12956 # we change the snode then (otherwise we use the one passed in)
12957 if dev.logical_id[0] == instance.primary_node:
12958 snode = dev.logical_id[1]
12960 snode = dev.logical_id[0]
12962 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12964 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12967 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12974 "iv_name": dev.iv_name,
12975 "dev_type": dev.dev_type,
12976 "logical_id": dev.logical_id,
12977 "physical_id": dev.physical_id,
12978 "pstatus": dev_pstatus,
12979 "sstatus": dev_sstatus,
12980 "children": dev_children,
12985 def Exec(self, feedback_fn):
12986 """Gather and return data"""
12989 cluster = self.cfg.GetClusterInfo()
12991 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12992 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12994 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12995 for node in nodes.values()))
12997 group2name_fn = lambda uuid: groups[uuid].name
12998 for instance in self.wanted_instances:
12999 pnode = nodes[instance.primary_node]
13001 if self.op.static or pnode.offline:
13002 remote_state = None
13004 self.LogWarning("Primary node %s is marked offline, returning static"
13005 " information only for instance %s" %
13006 (pnode.name, instance.name))
13008 remote_info = self.rpc.call_instance_info(instance.primary_node,
13010 instance.hypervisor)
13011 remote_info.Raise("Error checking node %s" % instance.primary_node)
13012 remote_info = remote_info.payload
13013 if remote_info and "state" in remote_info:
13014 remote_state = "up"
13016 if instance.admin_state == constants.ADMINST_UP:
13017 remote_state = "down"
13019 remote_state = instance.admin_state
13021 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13024 snodes_group_uuids = [nodes[snode_name].group
13025 for snode_name in instance.secondary_nodes]
13027 result[instance.name] = {
13028 "name": instance.name,
13029 "config_state": instance.admin_state,
13030 "run_state": remote_state,
13031 "pnode": instance.primary_node,
13032 "pnode_group_uuid": pnode.group,
13033 "pnode_group_name": group2name_fn(pnode.group),
13034 "snodes": instance.secondary_nodes,
13035 "snodes_group_uuids": snodes_group_uuids,
13036 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13038 # this happens to be the same format used for hooks
13039 "nics": _NICListToTuple(self, instance.nics),
13040 "disk_template": instance.disk_template,
13042 "hypervisor": instance.hypervisor,
13043 "network_port": instance.network_port,
13044 "hv_instance": instance.hvparams,
13045 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13046 "be_instance": instance.beparams,
13047 "be_actual": cluster.FillBE(instance),
13048 "os_instance": instance.osparams,
13049 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13050 "serial_no": instance.serial_no,
13051 "mtime": instance.mtime,
13052 "ctime": instance.ctime,
13053 "uuid": instance.uuid,
13059 def PrepareContainerMods(mods, private_fn):
13060 """Prepares a list of container modifications by adding a private data field.
13062 @type mods: list of tuples; (operation, index, parameters)
13063 @param mods: List of modifications
13064 @type private_fn: callable or None
13065 @param private_fn: Callable for constructing a private data field for a
13070 if private_fn is None:
13075 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13078 #: Type description for changes as returned by L{ApplyContainerMods}'s
13080 _TApplyContModsCbChanges = \
13081 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13082 ht.TNonEmptyString,
13087 def ApplyContainerMods(kind, container, chgdesc, mods,
13088 create_fn, modify_fn, remove_fn):
13089 """Applies descriptions in C{mods} to C{container}.
13092 @param kind: One-word item description
13093 @type container: list
13094 @param container: Container to modify
13095 @type chgdesc: None or list
13096 @param chgdesc: List of applied changes
13098 @param mods: Modifications as returned by L{PrepareContainerMods}
13099 @type create_fn: callable
13100 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13101 receives absolute item index, parameters and private data object as added
13102 by L{PrepareContainerMods}, returns tuple containing new item and changes
13104 @type modify_fn: callable
13105 @param modify_fn: Callback for modifying an existing item
13106 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13107 and private data object as added by L{PrepareContainerMods}, returns
13109 @type remove_fn: callable
13110 @param remove_fn: Callback on removing item; receives absolute item index,
13111 item and private data object as added by L{PrepareContainerMods}
13114 for (op, idx, params, private) in mods:
13117 absidx = len(container) - 1
13119 raise IndexError("Not accepting negative indices other than -1")
13120 elif idx > len(container):
13121 raise IndexError("Got %s index %s, but there are only %s" %
13122 (kind, idx, len(container)))
13128 if op == constants.DDM_ADD:
13129 # Calculate where item will be added
13131 addidx = len(container)
13135 if create_fn is None:
13138 (item, changes) = create_fn(addidx, params, private)
13141 container.append(item)
13144 assert idx <= len(container)
13145 # list.insert does so before the specified index
13146 container.insert(idx, item)
13148 # Retrieve existing item
13150 item = container[absidx]
13152 raise IndexError("Invalid %s index %s" % (kind, idx))
13154 if op == constants.DDM_REMOVE:
13157 if remove_fn is not None:
13158 remove_fn(absidx, item, private)
13160 changes = [("%s/%s" % (kind, absidx), "remove")]
13162 assert container[absidx] == item
13163 del container[absidx]
13164 elif op == constants.DDM_MODIFY:
13165 if modify_fn is not None:
13166 changes = modify_fn(absidx, item, params, private)
13168 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13170 assert _TApplyContModsCbChanges(changes)
13172 if not (chgdesc is None or changes is None):
13173 chgdesc.extend(changes)
13176 def _UpdateIvNames(base_index, disks):
13177 """Updates the C{iv_name} attribute of disks.
13179 @type disks: list of L{objects.Disk}
13182 for (idx, disk) in enumerate(disks):
13183 disk.iv_name = "disk/%s" % (base_index + idx, )
13186 class _InstNicModPrivate:
13187 """Data structure for network interface modifications.
13189 Used by L{LUInstanceSetParams}.
13192 def __init__(self):
13197 class LUInstanceSetParams(LogicalUnit):
13198 """Modifies an instances's parameters.
13201 HPATH = "instance-modify"
13202 HTYPE = constants.HTYPE_INSTANCE
13206 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13207 assert ht.TList(mods)
13208 assert not mods or len(mods[0]) in (2, 3)
13210 if mods and len(mods[0]) == 2:
13214 for op, params in mods:
13215 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13216 result.append((op, -1, params))
13220 raise errors.OpPrereqError("Only one %s add or remove operation is"
13221 " supported at a time" % kind,
13222 errors.ECODE_INVAL)
13224 result.append((constants.DDM_MODIFY, op, params))
13226 assert verify_fn(result)
13233 def _CheckMods(kind, mods, key_types, item_fn):
13234 """Ensures requested disk/NIC modifications are valid.
13237 for (op, _, params) in mods:
13238 assert ht.TDict(params)
13240 # If 'key_types' is an empty dict, we assume we have an
13241 # 'ext' template and thus do not ForceDictType
13243 utils.ForceDictType(params, key_types)
13245 if op == constants.DDM_REMOVE:
13247 raise errors.OpPrereqError("No settings should be passed when"
13248 " removing a %s" % kind,
13249 errors.ECODE_INVAL)
13250 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13251 item_fn(op, params)
13253 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13256 def _VerifyDiskModification(op, params):
13257 """Verifies a disk modification.
13260 if op == constants.DDM_ADD:
13261 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13262 if mode not in constants.DISK_ACCESS_SET:
13263 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13264 errors.ECODE_INVAL)
13266 size = params.get(constants.IDISK_SIZE, None)
13268 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13269 constants.IDISK_SIZE, errors.ECODE_INVAL)
13273 except (TypeError, ValueError), err:
13274 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13275 errors.ECODE_INVAL)
13277 params[constants.IDISK_SIZE] = size
13279 elif op == constants.DDM_MODIFY:
13280 if constants.IDISK_SIZE in params:
13281 raise errors.OpPrereqError("Disk size change not possible, use"
13282 " grow-disk", errors.ECODE_INVAL)
13283 if constants.IDISK_MODE not in params:
13284 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13285 " modification supported, but missing",
13286 errors.ECODE_NOENT)
13287 if len(params) > 1:
13288 raise errors.OpPrereqError("Disk modification doesn't support"
13289 " additional arbitrary parameters",
13290 errors.ECODE_INVAL)
13293 def _VerifyNicModification(op, params):
13294 """Verifies a network interface modification.
13297 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13298 ip = params.get(constants.INIC_IP, None)
13299 req_net = params.get(constants.INIC_NETWORK, None)
13300 link = params.get(constants.NIC_LINK, None)
13301 mode = params.get(constants.NIC_MODE, None)
13302 if req_net is not None:
13303 if req_net.lower() == constants.VALUE_NONE:
13304 params[constants.INIC_NETWORK] = None
13306 elif link is not None or mode is not None:
13307 raise errors.OpPrereqError("If network is given"
13308 " mode or link should not",
13309 errors.ECODE_INVAL)
13311 if op == constants.DDM_ADD:
13312 macaddr = params.get(constants.INIC_MAC, None)
13313 if macaddr is None:
13314 params[constants.INIC_MAC] = constants.VALUE_AUTO
13317 if ip.lower() == constants.VALUE_NONE:
13318 params[constants.INIC_IP] = None
13320 if ip.lower() == constants.NIC_IP_POOL:
13321 if op == constants.DDM_ADD and req_net is None:
13322 raise errors.OpPrereqError("If ip=pool, parameter network"
13324 errors.ECODE_INVAL)
13326 if not netutils.IPAddress.IsValid(ip):
13327 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13328 errors.ECODE_INVAL)
13330 if constants.INIC_MAC in params:
13331 macaddr = params[constants.INIC_MAC]
13332 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13333 macaddr = utils.NormalizeAndValidateMac(macaddr)
13335 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13336 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13337 " modifying an existing NIC",
13338 errors.ECODE_INVAL)
13340 def CheckArguments(self):
13341 if not (self.op.nics or self.op.disks or self.op.disk_template or
13342 self.op.hvparams or self.op.beparams or self.op.os_name or
13343 self.op.offline is not None or self.op.runtime_mem):
13344 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13346 if self.op.hvparams:
13347 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13348 "hypervisor", "instance", "cluster")
13350 self.op.disks = self._UpgradeDiskNicMods(
13351 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13352 self.op.nics = self._UpgradeDiskNicMods(
13353 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13355 if self.op.disks and self.op.disk_template is not None:
13356 raise errors.OpPrereqError("Disk template conversion and other disk"
13357 " changes not supported at the same time",
13358 errors.ECODE_INVAL)
13360 if (self.op.disk_template and
13361 self.op.disk_template in constants.DTS_INT_MIRROR and
13362 self.op.remote_node is None):
13363 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13364 " one requires specifying a secondary node",
13365 errors.ECODE_INVAL)
13367 # Check NIC modifications
13368 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13369 self._VerifyNicModification)
13371 def ExpandNames(self):
13372 self._ExpandAndLockInstance()
13373 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13374 # Can't even acquire node locks in shared mode as upcoming changes in
13375 # Ganeti 2.6 will start to modify the node object on disk conversion
13376 self.needed_locks[locking.LEVEL_NODE] = []
13377 self.needed_locks[locking.LEVEL_NODE_RES] = []
13378 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13379 # Look node group to look up the ipolicy
13380 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13382 def DeclareLocks(self, level):
13383 if level == locking.LEVEL_NODEGROUP:
13384 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13385 # Acquire locks for the instance's nodegroups optimistically. Needs
13386 # to be verified in CheckPrereq
13387 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13388 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13389 elif level == locking.LEVEL_NODE:
13390 self._LockInstancesNodes()
13391 if self.op.disk_template and self.op.remote_node:
13392 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13393 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13394 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13396 self.needed_locks[locking.LEVEL_NODE_RES] = \
13397 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13399 def BuildHooksEnv(self):
13400 """Build hooks env.
13402 This runs on the master, primary and secondaries.
13406 if constants.BE_MINMEM in self.be_new:
13407 args["minmem"] = self.be_new[constants.BE_MINMEM]
13408 if constants.BE_MAXMEM in self.be_new:
13409 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13410 if constants.BE_VCPUS in self.be_new:
13411 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13412 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13413 # information at all.
13415 if self._new_nics is not None:
13418 for nic in self._new_nics:
13419 n = copy.deepcopy(nic)
13420 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13421 n.nicparams = nicparams
13422 nics.append(_NICToTuple(self, n))
13424 args["nics"] = nics
13426 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13427 if self.op.disk_template:
13428 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13429 if self.op.runtime_mem:
13430 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13434 def BuildHooksNodes(self):
13435 """Build hooks nodes.
13438 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13441 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13442 old_params, cluster, pnode):
13444 update_params_dict = dict([(key, params[key])
13445 for key in constants.NICS_PARAMETERS
13448 req_link = update_params_dict.get(constants.NIC_LINK, None)
13449 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13451 new_net_uuid = None
13452 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13453 if new_net_uuid_or_name:
13454 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13455 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13458 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13461 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13463 raise errors.OpPrereqError("No netparams found for the network"
13464 " %s, probably not connected" %
13465 new_net_obj.name, errors.ECODE_INVAL)
13466 new_params = dict(netparams)
13468 new_params = _GetUpdatedParams(old_params, update_params_dict)
13470 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13472 new_filled_params = cluster.SimpleFillNIC(new_params)
13473 objects.NIC.CheckParameterSyntax(new_filled_params)
13475 new_mode = new_filled_params[constants.NIC_MODE]
13476 if new_mode == constants.NIC_MODE_BRIDGED:
13477 bridge = new_filled_params[constants.NIC_LINK]
13478 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13480 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13482 self.warn.append(msg)
13484 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13486 elif new_mode == constants.NIC_MODE_ROUTED:
13487 ip = params.get(constants.INIC_IP, old_ip)
13489 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13490 " on a routed NIC", errors.ECODE_INVAL)
13492 elif new_mode == constants.NIC_MODE_OVS:
13493 # TODO: check OVS link
13494 self.LogInfo("OVS links are currently not checked for correctness")
13496 if constants.INIC_MAC in params:
13497 mac = params[constants.INIC_MAC]
13499 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13500 errors.ECODE_INVAL)
13501 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13502 # otherwise generate the MAC address
13503 params[constants.INIC_MAC] = \
13504 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13506 # or validate/reserve the current one
13508 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13509 except errors.ReservationError:
13510 raise errors.OpPrereqError("MAC address '%s' already in use"
13511 " in cluster" % mac,
13512 errors.ECODE_NOTUNIQUE)
13513 elif new_net_uuid != old_net_uuid:
13515 def get_net_prefix(net_uuid):
13518 nobj = self.cfg.GetNetwork(net_uuid)
13519 mac_prefix = nobj.mac_prefix
13523 new_prefix = get_net_prefix(new_net_uuid)
13524 old_prefix = get_net_prefix(old_net_uuid)
13525 if old_prefix != new_prefix:
13526 params[constants.INIC_MAC] = \
13527 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13529 # if there is a change in (ip, network) tuple
13530 new_ip = params.get(constants.INIC_IP, old_ip)
13531 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13533 # if IP is pool then require a network and generate one IP
13534 if new_ip.lower() == constants.NIC_IP_POOL:
13537 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13538 except errors.ReservationError:
13539 raise errors.OpPrereqError("Unable to get a free IP"
13540 " from the address pool",
13541 errors.ECODE_STATE)
13542 self.LogInfo("Chose IP %s from network %s",
13545 params[constants.INIC_IP] = new_ip
13547 raise errors.OpPrereqError("ip=pool, but no network found",
13548 errors.ECODE_INVAL)
13549 # Reserve new IP if in the new network if any
13552 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13553 self.LogInfo("Reserving IP %s in network %s",
13554 new_ip, new_net_obj.name)
13555 except errors.ReservationError:
13556 raise errors.OpPrereqError("IP %s not available in network %s" %
13557 (new_ip, new_net_obj.name),
13558 errors.ECODE_NOTUNIQUE)
13559 # new network is None so check if new IP is a conflicting IP
13560 elif self.op.conflicts_check:
13561 _CheckForConflictingIp(self, new_ip, pnode)
13563 # release old IP if old network is not None
13564 if old_ip and old_net_uuid:
13566 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13567 except errors.AddressPoolError:
13568 logging.warning("Release IP %s not contained in network %s",
13569 old_ip, old_net_obj.name)
13571 # there are no changes in (ip, network) tuple and old network is not None
13572 elif (old_net_uuid is not None and
13573 (req_link is not None or req_mode is not None)):
13574 raise errors.OpPrereqError("Not allowed to change link or mode of"
13575 " a NIC that is connected to a network",
13576 errors.ECODE_INVAL)
13578 private.params = new_params
13579 private.filled = new_filled_params
13581 def _PreCheckDiskTemplate(self, pnode_info):
13582 """CheckPrereq checks related to a new disk template."""
13583 # Arguments are passed to avoid configuration lookups
13584 instance = self.instance
13585 pnode = instance.primary_node
13586 cluster = self.cluster
13587 if instance.disk_template == self.op.disk_template:
13588 raise errors.OpPrereqError("Instance already has disk template %s" %
13589 instance.disk_template, errors.ECODE_INVAL)
13591 if (instance.disk_template,
13592 self.op.disk_template) not in self._DISK_CONVERSIONS:
13593 raise errors.OpPrereqError("Unsupported disk template conversion from"
13594 " %s to %s" % (instance.disk_template,
13595 self.op.disk_template),
13596 errors.ECODE_INVAL)
13597 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13598 msg="cannot change disk template")
13599 if self.op.disk_template in constants.DTS_INT_MIRROR:
13600 if self.op.remote_node == pnode:
13601 raise errors.OpPrereqError("Given new secondary node %s is the same"
13602 " as the primary node of the instance" %
13603 self.op.remote_node, errors.ECODE_STATE)
13604 _CheckNodeOnline(self, self.op.remote_node)
13605 _CheckNodeNotDrained(self, self.op.remote_node)
13606 # FIXME: here we assume that the old instance type is DT_PLAIN
13607 assert instance.disk_template == constants.DT_PLAIN
13608 disks = [{constants.IDISK_SIZE: d.size,
13609 constants.IDISK_VG: d.logical_id[0]}
13610 for d in instance.disks]
13611 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13612 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13614 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13615 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13616 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13618 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13619 ignore=self.op.ignore_ipolicy)
13620 if pnode_info.group != snode_info.group:
13621 self.LogWarning("The primary and secondary nodes are in two"
13622 " different node groups; the disk parameters"
13623 " from the first disk's node group will be"
13626 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13627 # Make sure none of the nodes require exclusive storage
13628 nodes = [pnode_info]
13629 if self.op.disk_template in constants.DTS_INT_MIRROR:
13631 nodes.append(snode_info)
13632 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13633 if compat.any(map(has_es, nodes)):
13634 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13635 " storage is enabled" % (instance.disk_template,
13636 self.op.disk_template))
13637 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13639 def CheckPrereq(self):
13640 """Check prerequisites.
13642 This only checks the instance list against the existing names.
13645 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13646 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13648 cluster = self.cluster = self.cfg.GetClusterInfo()
13649 assert self.instance is not None, \
13650 "Cannot retrieve locked instance %s" % self.op.instance_name
13652 pnode = instance.primary_node
13653 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13654 nodelist = list(instance.all_nodes)
13655 pnode_info = self.cfg.GetNodeInfo(pnode)
13656 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13658 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13659 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13660 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13662 # dictionary with instance information after the modification
13665 # Check disk modifications. This is done here and not in CheckArguments
13666 # (as with NICs), because we need to know the instance's disk template
13667 if instance.disk_template == constants.DT_EXT:
13668 self._CheckMods("disk", self.op.disks, {},
13669 self._VerifyDiskModification)
13671 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13672 self._VerifyDiskModification)
13674 # Prepare disk/NIC modifications
13675 self.diskmod = PrepareContainerMods(self.op.disks, None)
13676 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13678 # Check the validity of the `provider' parameter
13679 if instance.disk_template in constants.DT_EXT:
13680 for mod in self.diskmod:
13681 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13682 if mod[0] == constants.DDM_ADD:
13683 if ext_provider is None:
13684 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13685 " '%s' missing, during disk add" %
13687 constants.IDISK_PROVIDER),
13688 errors.ECODE_NOENT)
13689 elif mod[0] == constants.DDM_MODIFY:
13691 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13693 constants.IDISK_PROVIDER,
13694 errors.ECODE_INVAL)
13696 for mod in self.diskmod:
13697 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13698 if ext_provider is not None:
13699 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13700 " instances of type '%s'" %
13701 (constants.IDISK_PROVIDER,
13703 errors.ECODE_INVAL)
13706 if self.op.os_name and not self.op.force:
13707 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13708 self.op.force_variant)
13709 instance_os = self.op.os_name
13711 instance_os = instance.os
13713 assert not (self.op.disk_template and self.op.disks), \
13714 "Can't modify disk template and apply disk changes at the same time"
13716 if self.op.disk_template:
13717 self._PreCheckDiskTemplate(pnode_info)
13719 # hvparams processing
13720 if self.op.hvparams:
13721 hv_type = instance.hypervisor
13722 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13723 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13724 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13727 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13728 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13729 self.hv_proposed = self.hv_new = hv_new # the new actual values
13730 self.hv_inst = i_hvdict # the new dict (without defaults)
13732 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13734 self.hv_new = self.hv_inst = {}
13736 # beparams processing
13737 if self.op.beparams:
13738 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13740 objects.UpgradeBeParams(i_bedict)
13741 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13742 be_new = cluster.SimpleFillBE(i_bedict)
13743 self.be_proposed = self.be_new = be_new # the new actual values
13744 self.be_inst = i_bedict # the new dict (without defaults)
13746 self.be_new = self.be_inst = {}
13747 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13748 be_old = cluster.FillBE(instance)
13750 # CPU param validation -- checking every time a parameter is
13751 # changed to cover all cases where either CPU mask or vcpus have
13753 if (constants.BE_VCPUS in self.be_proposed and
13754 constants.HV_CPU_MASK in self.hv_proposed):
13756 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13757 # Verify mask is consistent with number of vCPUs. Can skip this
13758 # test if only 1 entry in the CPU mask, which means same mask
13759 # is applied to all vCPUs.
13760 if (len(cpu_list) > 1 and
13761 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13762 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13764 (self.be_proposed[constants.BE_VCPUS],
13765 self.hv_proposed[constants.HV_CPU_MASK]),
13766 errors.ECODE_INVAL)
13768 # Only perform this test if a new CPU mask is given
13769 if constants.HV_CPU_MASK in self.hv_new:
13770 # Calculate the largest CPU number requested
13771 max_requested_cpu = max(map(max, cpu_list))
13772 # Check that all of the instance's nodes have enough physical CPUs to
13773 # satisfy the requested CPU mask
13774 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13775 max_requested_cpu + 1, instance.hypervisor)
13777 # osparams processing
13778 if self.op.osparams:
13779 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13780 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13781 self.os_inst = i_osdict # the new dict (without defaults)
13787 #TODO(dynmem): do the appropriate check involving MINMEM
13788 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13789 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13790 mem_check_list = [pnode]
13791 if be_new[constants.BE_AUTO_BALANCE]:
13792 # either we changed auto_balance to yes or it was from before
13793 mem_check_list.extend(instance.secondary_nodes)
13794 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13795 instance.hypervisor)
13796 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13797 [instance.hypervisor], False)
13798 pninfo = nodeinfo[pnode]
13799 msg = pninfo.fail_msg
13801 # Assume the primary node is unreachable and go ahead
13802 self.warn.append("Can't get info from primary node %s: %s" %
13805 (_, _, (pnhvinfo, )) = pninfo.payload
13806 if not isinstance(pnhvinfo.get("memory_free", None), int):
13807 self.warn.append("Node data from primary node %s doesn't contain"
13808 " free memory information" % pnode)
13809 elif instance_info.fail_msg:
13810 self.warn.append("Can't get instance runtime information: %s" %
13811 instance_info.fail_msg)
13813 if instance_info.payload:
13814 current_mem = int(instance_info.payload["memory"])
13816 # Assume instance not running
13817 # (there is a slight race condition here, but it's not very
13818 # probable, and we have no other way to check)
13819 # TODO: Describe race condition
13821 #TODO(dynmem): do the appropriate check involving MINMEM
13822 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13823 pnhvinfo["memory_free"])
13825 raise errors.OpPrereqError("This change will prevent the instance"
13826 " from starting, due to %d MB of memory"
13827 " missing on its primary node" %
13828 miss_mem, errors.ECODE_NORES)
13830 if be_new[constants.BE_AUTO_BALANCE]:
13831 for node, nres in nodeinfo.items():
13832 if node not in instance.secondary_nodes:
13834 nres.Raise("Can't get info from secondary node %s" % node,
13835 prereq=True, ecode=errors.ECODE_STATE)
13836 (_, _, (nhvinfo, )) = nres.payload
13837 if not isinstance(nhvinfo.get("memory_free", None), int):
13838 raise errors.OpPrereqError("Secondary node %s didn't return free"
13839 " memory information" % node,
13840 errors.ECODE_STATE)
13841 #TODO(dynmem): do the appropriate check involving MINMEM
13842 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13843 raise errors.OpPrereqError("This change will prevent the instance"
13844 " from failover to its secondary node"
13845 " %s, due to not enough memory" % node,
13846 errors.ECODE_STATE)
13848 if self.op.runtime_mem:
13849 remote_info = self.rpc.call_instance_info(instance.primary_node,
13851 instance.hypervisor)
13852 remote_info.Raise("Error checking node %s" % instance.primary_node)
13853 if not remote_info.payload: # not running already
13854 raise errors.OpPrereqError("Instance %s is not running" %
13855 instance.name, errors.ECODE_STATE)
13857 current_memory = remote_info.payload["memory"]
13858 if (not self.op.force and
13859 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13860 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13861 raise errors.OpPrereqError("Instance %s must have memory between %d"
13862 " and %d MB of memory unless --force is"
13865 self.be_proposed[constants.BE_MINMEM],
13866 self.be_proposed[constants.BE_MAXMEM]),
13867 errors.ECODE_INVAL)
13869 delta = self.op.runtime_mem - current_memory
13871 _CheckNodeFreeMemory(self, instance.primary_node,
13872 "ballooning memory for instance %s" %
13873 instance.name, delta, instance.hypervisor)
13875 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13876 raise errors.OpPrereqError("Disk operations not supported for"
13877 " diskless instances", errors.ECODE_INVAL)
13879 def _PrepareNicCreate(_, params, private):
13880 self._PrepareNicModification(params, private, None, None,
13881 {}, cluster, pnode)
13882 return (None, None)
13884 def _PrepareNicMod(_, nic, params, private):
13885 self._PrepareNicModification(params, private, nic.ip, nic.network,
13886 nic.nicparams, cluster, pnode)
13889 def _PrepareNicRemove(_, params, __):
13891 net = params.network
13892 if net is not None and ip is not None:
13893 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13895 # Verify NIC changes (operating on copy)
13896 nics = instance.nics[:]
13897 ApplyContainerMods("NIC", nics, None, self.nicmod,
13898 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13899 if len(nics) > constants.MAX_NICS:
13900 raise errors.OpPrereqError("Instance has too many network interfaces"
13901 " (%d), cannot add more" % constants.MAX_NICS,
13902 errors.ECODE_STATE)
13904 # Verify disk changes (operating on a copy)
13905 disks = instance.disks[:]
13906 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13907 if len(disks) > constants.MAX_DISKS:
13908 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13909 " more" % constants.MAX_DISKS,
13910 errors.ECODE_STATE)
13911 disk_sizes = [disk.size for disk in instance.disks]
13912 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13913 self.diskmod if op == constants.DDM_ADD)
13914 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13915 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13917 if self.op.offline is not None and self.op.offline:
13918 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13919 msg="can't change to offline")
13921 # Pre-compute NIC changes (necessary to use result in hooks)
13922 self._nic_chgdesc = []
13924 # Operate on copies as this is still in prereq
13925 nics = [nic.Copy() for nic in instance.nics]
13926 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13927 self._CreateNewNic, self._ApplyNicMods, None)
13928 self._new_nics = nics
13929 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13931 self._new_nics = None
13932 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13934 if not self.op.ignore_ipolicy:
13935 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13938 # Fill ispec with backend parameters
13939 ispec[constants.ISPEC_SPINDLE_USE] = \
13940 self.be_new.get(constants.BE_SPINDLE_USE, None)
13941 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13944 # Copy ispec to verify parameters with min/max values separately
13945 if self.op.disk_template:
13946 new_disk_template = self.op.disk_template
13948 new_disk_template = instance.disk_template
13949 ispec_max = ispec.copy()
13950 ispec_max[constants.ISPEC_MEM_SIZE] = \
13951 self.be_new.get(constants.BE_MAXMEM, None)
13952 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13954 ispec_min = ispec.copy()
13955 ispec_min[constants.ISPEC_MEM_SIZE] = \
13956 self.be_new.get(constants.BE_MINMEM, None)
13957 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13960 if (res_max or res_min):
13961 # FIXME: Improve error message by including information about whether
13962 # the upper or lower limit of the parameter fails the ipolicy.
13963 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13964 (group_info, group_info.name,
13965 utils.CommaJoin(set(res_max + res_min))))
13966 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13968 def _ConvertPlainToDrbd(self, feedback_fn):
13969 """Converts an instance from plain to drbd.
13972 feedback_fn("Converting template to drbd")
13973 instance = self.instance
13974 pnode = instance.primary_node
13975 snode = self.op.remote_node
13977 assert instance.disk_template == constants.DT_PLAIN
13979 # create a fake disk info for _GenerateDiskTemplate
13980 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13981 constants.IDISK_VG: d.logical_id[0]}
13982 for d in instance.disks]
13983 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13984 instance.name, pnode, [snode],
13985 disk_info, None, None, 0, feedback_fn,
13987 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13989 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13990 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13991 info = _GetInstanceInfoText(instance)
13992 feedback_fn("Creating additional volumes...")
13993 # first, create the missing data and meta devices
13994 for disk in anno_disks:
13995 # unfortunately this is... not too nice
13996 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13997 info, True, p_excl_stor)
13998 for child in disk.children:
13999 _CreateSingleBlockDev(self, snode, instance, child, info, True,
14001 # at this stage, all new LVs have been created, we can rename the
14003 feedback_fn("Renaming original volumes...")
14004 rename_list = [(o, n.children[0].logical_id)
14005 for (o, n) in zip(instance.disks, new_disks)]
14006 result = self.rpc.call_blockdev_rename(pnode, rename_list)
14007 result.Raise("Failed to rename original LVs")
14009 feedback_fn("Initializing DRBD devices...")
14010 # all child devices are in place, we can now create the DRBD devices
14012 for disk in anno_disks:
14013 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
14014 f_create = node == pnode
14015 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
14017 except errors.GenericError, e:
14018 feedback_fn("Initializing of DRBD devices failed;"
14019 " renaming back original volumes...")
14020 for disk in new_disks:
14021 self.cfg.SetDiskID(disk, pnode)
14022 rename_back_list = [(n.children[0], o.logical_id)
14023 for (n, o) in zip(new_disks, instance.disks)]
14024 result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
14025 result.Raise("Failed to rename LVs back after error %s" % str(e))
14028 # at this point, the instance has been modified
14029 instance.disk_template = constants.DT_DRBD8
14030 instance.disks = new_disks
14031 self.cfg.Update(instance, feedback_fn)
14033 # Release node locks while waiting for sync
14034 _ReleaseLocks(self, locking.LEVEL_NODE)
14036 # disks are created, waiting for sync
14037 disk_abort = not _WaitForSync(self, instance,
14038 oneshot=not self.op.wait_for_sync)
14040 raise errors.OpExecError("There are some degraded disks for"
14041 " this instance, please cleanup manually")
14043 # Node resource locks will be released by caller
14045 def _ConvertDrbdToPlain(self, feedback_fn):
14046 """Converts an instance from drbd to plain.
14049 instance = self.instance
14051 assert len(instance.secondary_nodes) == 1
14052 assert instance.disk_template == constants.DT_DRBD8
14054 pnode = instance.primary_node
14055 snode = instance.secondary_nodes[0]
14056 feedback_fn("Converting template to plain")
14058 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14059 new_disks = [d.children[0] for d in instance.disks]
14061 # copy over size and mode
14062 for parent, child in zip(old_disks, new_disks):
14063 child.size = parent.size
14064 child.mode = parent.mode
14066 # this is a DRBD disk, return its port to the pool
14067 # NOTE: this must be done right before the call to cfg.Update!
14068 for disk in old_disks:
14069 tcp_port = disk.logical_id[2]
14070 self.cfg.AddTcpUdpPort(tcp_port)
14072 # update instance structure
14073 instance.disks = new_disks
14074 instance.disk_template = constants.DT_PLAIN
14075 _UpdateIvNames(0, instance.disks)
14076 self.cfg.Update(instance, feedback_fn)
14078 # Release locks in case removing disks takes a while
14079 _ReleaseLocks(self, locking.LEVEL_NODE)
14081 feedback_fn("Removing volumes on the secondary node...")
14082 for disk in old_disks:
14083 self.cfg.SetDiskID(disk, snode)
14084 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14086 self.LogWarning("Could not remove block device %s on node %s,"
14087 " continuing anyway: %s", disk.iv_name, snode, msg)
14089 feedback_fn("Removing unneeded volumes on the primary node...")
14090 for idx, disk in enumerate(old_disks):
14091 meta = disk.children[1]
14092 self.cfg.SetDiskID(meta, pnode)
14093 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14095 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14096 " continuing anyway: %s", idx, pnode, msg)
14098 def _CreateNewDisk(self, idx, params, _):
14099 """Creates a new disk.
14102 instance = self.instance
14105 if instance.disk_template in constants.DTS_FILEBASED:
14106 (file_driver, file_path) = instance.disks[0].logical_id
14107 file_path = os.path.dirname(file_path)
14109 file_driver = file_path = None
14112 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14113 instance.primary_node, instance.secondary_nodes,
14114 [params], file_path, file_driver, idx,
14115 self.Log, self.diskparams)[0]
14117 info = _GetInstanceInfoText(instance)
14119 logging.info("Creating volume %s for instance %s",
14120 disk.iv_name, instance.name)
14121 # Note: this needs to be kept in sync with _CreateDisks
14123 for node in instance.all_nodes:
14124 f_create = (node == instance.primary_node)
14126 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14127 except errors.OpExecError, err:
14128 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14129 disk.iv_name, disk, node, err)
14132 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14136 def _ModifyDisk(idx, disk, params, _):
14137 """Modifies a disk.
14140 disk.mode = params[constants.IDISK_MODE]
14143 ("disk.mode/%d" % idx, disk.mode),
14146 def _RemoveDisk(self, idx, root, _):
14150 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14151 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14152 self.cfg.SetDiskID(disk, node)
14153 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14155 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14156 " continuing anyway", idx, node, msg)
14158 # if this is a DRBD disk, return its port to the pool
14159 if root.dev_type in constants.LDS_DRBD:
14160 self.cfg.AddTcpUdpPort(root.logical_id[2])
14162 def _CreateNewNic(self, idx, params, private):
14163 """Creates data structure for a new network interface.
14166 mac = params[constants.INIC_MAC]
14167 ip = params.get(constants.INIC_IP, None)
14168 net = params.get(constants.INIC_NETWORK, None)
14169 net_uuid = self.cfg.LookupNetwork(net)
14170 #TODO: not private.filled?? can a nic have no nicparams??
14171 nicparams = private.filled
14172 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14176 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14177 (mac, ip, private.filled[constants.NIC_MODE],
14178 private.filled[constants.NIC_LINK],
14182 def _ApplyNicMods(self, idx, nic, params, private):
14183 """Modifies a network interface.
14188 for key in [constants.INIC_MAC, constants.INIC_IP]:
14190 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14191 setattr(nic, key, params[key])
14193 new_net = params.get(constants.INIC_NETWORK, nic.network)
14194 new_net_uuid = self.cfg.LookupNetwork(new_net)
14195 if new_net_uuid != nic.network:
14196 changes.append(("nic.network/%d" % idx, new_net))
14197 nic.network = new_net_uuid
14200 nic.nicparams = private.filled
14202 for (key, val) in nic.nicparams.items():
14203 changes.append(("nic.%s/%d" % (key, idx), val))
14207 def Exec(self, feedback_fn):
14208 """Modifies an instance.
14210 All parameters take effect only at the next restart of the instance.
14213 # Process here the warnings from CheckPrereq, as we don't have a
14214 # feedback_fn there.
14215 # TODO: Replace with self.LogWarning
14216 for warn in self.warn:
14217 feedback_fn("WARNING: %s" % warn)
14219 assert ((self.op.disk_template is None) ^
14220 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14221 "Not owning any node resource locks"
14224 instance = self.instance
14227 if self.op.runtime_mem:
14228 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14230 self.op.runtime_mem)
14231 rpcres.Raise("Cannot modify instance runtime memory")
14232 result.append(("runtime_memory", self.op.runtime_mem))
14234 # Apply disk changes
14235 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14236 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14237 _UpdateIvNames(0, instance.disks)
14239 if self.op.disk_template:
14241 check_nodes = set(instance.all_nodes)
14242 if self.op.remote_node:
14243 check_nodes.add(self.op.remote_node)
14244 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14245 owned = self.owned_locks(level)
14246 assert not (check_nodes - owned), \
14247 ("Not owning the correct locks, owning %r, expected at least %r" %
14248 (owned, check_nodes))
14250 r_shut = _ShutdownInstanceDisks(self, instance)
14252 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14253 " proceed with disk template conversion")
14254 mode = (instance.disk_template, self.op.disk_template)
14256 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14258 self.cfg.ReleaseDRBDMinors(instance.name)
14260 result.append(("disk_template", self.op.disk_template))
14262 assert instance.disk_template == self.op.disk_template, \
14263 ("Expected disk template '%s', found '%s'" %
14264 (self.op.disk_template, instance.disk_template))
14266 # Release node and resource locks if there are any (they might already have
14267 # been released during disk conversion)
14268 _ReleaseLocks(self, locking.LEVEL_NODE)
14269 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14271 # Apply NIC changes
14272 if self._new_nics is not None:
14273 instance.nics = self._new_nics
14274 result.extend(self._nic_chgdesc)
14277 if self.op.hvparams:
14278 instance.hvparams = self.hv_inst
14279 for key, val in self.op.hvparams.iteritems():
14280 result.append(("hv/%s" % key, val))
14283 if self.op.beparams:
14284 instance.beparams = self.be_inst
14285 for key, val in self.op.beparams.iteritems():
14286 result.append(("be/%s" % key, val))
14289 if self.op.os_name:
14290 instance.os = self.op.os_name
14293 if self.op.osparams:
14294 instance.osparams = self.os_inst
14295 for key, val in self.op.osparams.iteritems():
14296 result.append(("os/%s" % key, val))
14298 if self.op.offline is None:
14301 elif self.op.offline:
14302 # Mark instance as offline
14303 self.cfg.MarkInstanceOffline(instance.name)
14304 result.append(("admin_state", constants.ADMINST_OFFLINE))
14306 # Mark instance as online, but stopped
14307 self.cfg.MarkInstanceDown(instance.name)
14308 result.append(("admin_state", constants.ADMINST_DOWN))
14310 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14312 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14313 self.owned_locks(locking.LEVEL_NODE)), \
14314 "All node locks should have been released by now"
14318 _DISK_CONVERSIONS = {
14319 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14320 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14324 class LUInstanceChangeGroup(LogicalUnit):
14325 HPATH = "instance-change-group"
14326 HTYPE = constants.HTYPE_INSTANCE
14329 def ExpandNames(self):
14330 self.share_locks = _ShareAll()
14332 self.needed_locks = {
14333 locking.LEVEL_NODEGROUP: [],
14334 locking.LEVEL_NODE: [],
14335 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14338 self._ExpandAndLockInstance()
14340 if self.op.target_groups:
14341 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14342 self.op.target_groups)
14344 self.req_target_uuids = None
14346 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14348 def DeclareLocks(self, level):
14349 if level == locking.LEVEL_NODEGROUP:
14350 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14352 if self.req_target_uuids:
14353 lock_groups = set(self.req_target_uuids)
14355 # Lock all groups used by instance optimistically; this requires going
14356 # via the node before it's locked, requiring verification later on
14357 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14358 lock_groups.update(instance_groups)
14360 # No target groups, need to lock all of them
14361 lock_groups = locking.ALL_SET
14363 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14365 elif level == locking.LEVEL_NODE:
14366 if self.req_target_uuids:
14367 # Lock all nodes used by instances
14368 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14369 self._LockInstancesNodes()
14371 # Lock all nodes in all potential target groups
14372 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14373 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14374 member_nodes = [node_name
14375 for group in lock_groups
14376 for node_name in self.cfg.GetNodeGroup(group).members]
14377 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14379 # Lock all nodes as all groups are potential targets
14380 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14382 def CheckPrereq(self):
14383 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14384 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14385 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14387 assert (self.req_target_uuids is None or
14388 owned_groups.issuperset(self.req_target_uuids))
14389 assert owned_instances == set([self.op.instance_name])
14391 # Get instance information
14392 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14394 # Check if node groups for locked instance are still correct
14395 assert owned_nodes.issuperset(self.instance.all_nodes), \
14396 ("Instance %s's nodes changed while we kept the lock" %
14397 self.op.instance_name)
14399 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14402 if self.req_target_uuids:
14403 # User requested specific target groups
14404 self.target_uuids = frozenset(self.req_target_uuids)
14406 # All groups except those used by the instance are potential targets
14407 self.target_uuids = owned_groups - inst_groups
14409 conflicting_groups = self.target_uuids & inst_groups
14410 if conflicting_groups:
14411 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14412 " used by the instance '%s'" %
14413 (utils.CommaJoin(conflicting_groups),
14414 self.op.instance_name),
14415 errors.ECODE_INVAL)
14417 if not self.target_uuids:
14418 raise errors.OpPrereqError("There are no possible target groups",
14419 errors.ECODE_INVAL)
14421 def BuildHooksEnv(self):
14422 """Build hooks env.
14425 assert self.target_uuids
14428 "TARGET_GROUPS": " ".join(self.target_uuids),
14431 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14435 def BuildHooksNodes(self):
14436 """Build hooks nodes.
14439 mn = self.cfg.GetMasterNode()
14440 return ([mn], [mn])
14442 def Exec(self, feedback_fn):
14443 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14445 assert instances == [self.op.instance_name], "Instance not locked"
14447 req = iallocator.IAReqGroupChange(instances=instances,
14448 target_groups=list(self.target_uuids))
14449 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14451 ial.Run(self.op.iallocator)
14453 if not ial.success:
14454 raise errors.OpPrereqError("Can't compute solution for changing group of"
14455 " instance '%s' using iallocator '%s': %s" %
14456 (self.op.instance_name, self.op.iallocator,
14457 ial.info), errors.ECODE_NORES)
14459 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14461 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14462 " instance '%s'", len(jobs), self.op.instance_name)
14464 return ResultWithJobs(jobs)
14467 class LUBackupQuery(NoHooksLU):
14468 """Query the exports list
14473 def CheckArguments(self):
14474 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14475 ["node", "export"], self.op.use_locking)
14477 def ExpandNames(self):
14478 self.expq.ExpandNames(self)
14480 def DeclareLocks(self, level):
14481 self.expq.DeclareLocks(self, level)
14483 def Exec(self, feedback_fn):
14486 for (node, expname) in self.expq.OldStyleQuery(self):
14487 if expname is None:
14488 result[node] = False
14490 result.setdefault(node, []).append(expname)
14495 class _ExportQuery(_QueryBase):
14496 FIELDS = query.EXPORT_FIELDS
14498 #: The node name is not a unique key for this query
14499 SORT_FIELD = "node"
14501 def ExpandNames(self, lu):
14502 lu.needed_locks = {}
14504 # The following variables interact with _QueryBase._GetNames
14506 self.wanted = _GetWantedNodes(lu, self.names)
14508 self.wanted = locking.ALL_SET
14510 self.do_locking = self.use_locking
14512 if self.do_locking:
14513 lu.share_locks = _ShareAll()
14514 lu.needed_locks = {
14515 locking.LEVEL_NODE: self.wanted,
14519 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14521 def DeclareLocks(self, lu, level):
14524 def _GetQueryData(self, lu):
14525 """Computes the list of nodes and their attributes.
14528 # Locking is not used
14530 assert not (compat.any(lu.glm.is_owned(level)
14531 for level in locking.LEVELS
14532 if level != locking.LEVEL_CLUSTER) or
14533 self.do_locking or self.use_locking)
14535 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14539 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14541 result.append((node, None))
14543 result.extend((node, expname) for expname in nres.payload)
14548 class LUBackupPrepare(NoHooksLU):
14549 """Prepares an instance for an export and returns useful information.
14554 def ExpandNames(self):
14555 self._ExpandAndLockInstance()
14557 def CheckPrereq(self):
14558 """Check prerequisites.
14561 instance_name = self.op.instance_name
14563 self.instance = self.cfg.GetInstanceInfo(instance_name)
14564 assert self.instance is not None, \
14565 "Cannot retrieve locked instance %s" % self.op.instance_name
14566 _CheckNodeOnline(self, self.instance.primary_node)
14568 self._cds = _GetClusterDomainSecret()
14570 def Exec(self, feedback_fn):
14571 """Prepares an instance for an export.
14574 instance = self.instance
14576 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14577 salt = utils.GenerateSecret(8)
14579 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14580 result = self.rpc.call_x509_cert_create(instance.primary_node,
14581 constants.RIE_CERT_VALIDITY)
14582 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14584 (name, cert_pem) = result.payload
14586 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14590 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14591 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14593 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14599 class LUBackupExport(LogicalUnit):
14600 """Export an instance to an image in the cluster.
14603 HPATH = "instance-export"
14604 HTYPE = constants.HTYPE_INSTANCE
14607 def CheckArguments(self):
14608 """Check the arguments.
14611 self.x509_key_name = self.op.x509_key_name
14612 self.dest_x509_ca_pem = self.op.destination_x509_ca
14614 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14615 if not self.x509_key_name:
14616 raise errors.OpPrereqError("Missing X509 key name for encryption",
14617 errors.ECODE_INVAL)
14619 if not self.dest_x509_ca_pem:
14620 raise errors.OpPrereqError("Missing destination X509 CA",
14621 errors.ECODE_INVAL)
14623 def ExpandNames(self):
14624 self._ExpandAndLockInstance()
14626 # Lock all nodes for local exports
14627 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14628 # FIXME: lock only instance primary and destination node
14630 # Sad but true, for now we have do lock all nodes, as we don't know where
14631 # the previous export might be, and in this LU we search for it and
14632 # remove it from its current node. In the future we could fix this by:
14633 # - making a tasklet to search (share-lock all), then create the
14634 # new one, then one to remove, after
14635 # - removing the removal operation altogether
14636 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14638 # Allocations should be stopped while this LU runs with node locks, but
14639 # it doesn't have to be exclusive
14640 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14641 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14643 def DeclareLocks(self, level):
14644 """Last minute lock declaration."""
14645 # All nodes are locked anyway, so nothing to do here.
14647 def BuildHooksEnv(self):
14648 """Build hooks env.
14650 This will run on the master, primary node and target node.
14654 "EXPORT_MODE": self.op.mode,
14655 "EXPORT_NODE": self.op.target_node,
14656 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14657 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14658 # TODO: Generic function for boolean env variables
14659 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14662 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14666 def BuildHooksNodes(self):
14667 """Build hooks nodes.
14670 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14672 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14673 nl.append(self.op.target_node)
14677 def CheckPrereq(self):
14678 """Check prerequisites.
14680 This checks that the instance and node names are valid.
14683 instance_name = self.op.instance_name
14685 self.instance = self.cfg.GetInstanceInfo(instance_name)
14686 assert self.instance is not None, \
14687 "Cannot retrieve locked instance %s" % self.op.instance_name
14688 _CheckNodeOnline(self, self.instance.primary_node)
14690 if (self.op.remove_instance and
14691 self.instance.admin_state == constants.ADMINST_UP and
14692 not self.op.shutdown):
14693 raise errors.OpPrereqError("Can not remove instance without shutting it"
14694 " down before", errors.ECODE_STATE)
14696 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14697 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14698 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14699 assert self.dst_node is not None
14701 _CheckNodeOnline(self, self.dst_node.name)
14702 _CheckNodeNotDrained(self, self.dst_node.name)
14705 self.dest_disk_info = None
14706 self.dest_x509_ca = None
14708 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14709 self.dst_node = None
14711 if len(self.op.target_node) != len(self.instance.disks):
14712 raise errors.OpPrereqError(("Received destination information for %s"
14713 " disks, but instance %s has %s disks") %
14714 (len(self.op.target_node), instance_name,
14715 len(self.instance.disks)),
14716 errors.ECODE_INVAL)
14718 cds = _GetClusterDomainSecret()
14720 # Check X509 key name
14722 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14723 except (TypeError, ValueError), err:
14724 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14725 errors.ECODE_INVAL)
14727 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14728 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14729 errors.ECODE_INVAL)
14731 # Load and verify CA
14733 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14734 except OpenSSL.crypto.Error, err:
14735 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14736 (err, ), errors.ECODE_INVAL)
14738 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14739 if errcode is not None:
14740 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14741 (msg, ), errors.ECODE_INVAL)
14743 self.dest_x509_ca = cert
14745 # Verify target information
14747 for idx, disk_data in enumerate(self.op.target_node):
14749 (host, port, magic) = \
14750 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14751 except errors.GenericError, err:
14752 raise errors.OpPrereqError("Target info for disk %s: %s" %
14753 (idx, err), errors.ECODE_INVAL)
14755 disk_info.append((host, port, magic))
14757 assert len(disk_info) == len(self.op.target_node)
14758 self.dest_disk_info = disk_info
14761 raise errors.ProgrammerError("Unhandled export mode %r" %
14764 # instance disk type verification
14765 # TODO: Implement export support for file-based disks
14766 for disk in self.instance.disks:
14767 if disk.dev_type == constants.LD_FILE:
14768 raise errors.OpPrereqError("Export not supported for instances with"
14769 " file-based disks", errors.ECODE_INVAL)
14771 def _CleanupExports(self, feedback_fn):
14772 """Removes exports of current instance from all other nodes.
14774 If an instance in a cluster with nodes A..D was exported to node C, its
14775 exports will be removed from the nodes A, B and D.
14778 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14780 nodelist = self.cfg.GetNodeList()
14781 nodelist.remove(self.dst_node.name)
14783 # on one-node clusters nodelist will be empty after the removal
14784 # if we proceed the backup would be removed because OpBackupQuery
14785 # substitutes an empty list with the full cluster node list.
14786 iname = self.instance.name
14788 feedback_fn("Removing old exports for instance %s" % iname)
14789 exportlist = self.rpc.call_export_list(nodelist)
14790 for node in exportlist:
14791 if exportlist[node].fail_msg:
14793 if iname in exportlist[node].payload:
14794 msg = self.rpc.call_export_remove(node, iname).fail_msg
14796 self.LogWarning("Could not remove older export for instance %s"
14797 " on node %s: %s", iname, node, msg)
14799 def Exec(self, feedback_fn):
14800 """Export an instance to an image in the cluster.
14803 assert self.op.mode in constants.EXPORT_MODES
14805 instance = self.instance
14806 src_node = instance.primary_node
14808 if self.op.shutdown:
14809 # shutdown the instance, but not the disks
14810 feedback_fn("Shutting down instance %s" % instance.name)
14811 result = self.rpc.call_instance_shutdown(src_node, instance,
14812 self.op.shutdown_timeout)
14813 # TODO: Maybe ignore failures if ignore_remove_failures is set
14814 result.Raise("Could not shutdown instance %s on"
14815 " node %s" % (instance.name, src_node))
14817 # set the disks ID correctly since call_instance_start needs the
14818 # correct drbd minor to create the symlinks
14819 for disk in instance.disks:
14820 self.cfg.SetDiskID(disk, src_node)
14822 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14825 # Activate the instance disks if we'exporting a stopped instance
14826 feedback_fn("Activating disks for %s" % instance.name)
14827 _StartInstanceDisks(self, instance, None)
14830 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14833 helper.CreateSnapshots()
14835 if (self.op.shutdown and
14836 instance.admin_state == constants.ADMINST_UP and
14837 not self.op.remove_instance):
14838 assert not activate_disks
14839 feedback_fn("Starting instance %s" % instance.name)
14840 result = self.rpc.call_instance_start(src_node,
14841 (instance, None, None), False)
14842 msg = result.fail_msg
14844 feedback_fn("Failed to start instance: %s" % msg)
14845 _ShutdownInstanceDisks(self, instance)
14846 raise errors.OpExecError("Could not start instance: %s" % msg)
14848 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14849 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14850 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14851 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14852 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14854 (key_name, _, _) = self.x509_key_name
14857 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14860 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14861 key_name, dest_ca_pem,
14866 # Check for backwards compatibility
14867 assert len(dresults) == len(instance.disks)
14868 assert compat.all(isinstance(i, bool) for i in dresults), \
14869 "Not all results are boolean: %r" % dresults
14873 feedback_fn("Deactivating disks for %s" % instance.name)
14874 _ShutdownInstanceDisks(self, instance)
14876 if not (compat.all(dresults) and fin_resu):
14879 failures.append("export finalization")
14880 if not compat.all(dresults):
14881 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14883 failures.append("disk export: disk(s) %s" % fdsk)
14885 raise errors.OpExecError("Export failed, errors in %s" %
14886 utils.CommaJoin(failures))
14888 # At this point, the export was successful, we can cleanup/finish
14890 # Remove instance if requested
14891 if self.op.remove_instance:
14892 feedback_fn("Removing instance %s" % instance.name)
14893 _RemoveInstance(self, feedback_fn, instance,
14894 self.op.ignore_remove_failures)
14896 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14897 self._CleanupExports(feedback_fn)
14899 return fin_resu, dresults
14902 class LUBackupRemove(NoHooksLU):
14903 """Remove exports related to the named instance.
14908 def ExpandNames(self):
14909 self.needed_locks = {
14910 # We need all nodes to be locked in order for RemoveExport to work, but
14911 # we don't need to lock the instance itself, as nothing will happen to it
14912 # (and we can remove exports also for a removed instance)
14913 locking.LEVEL_NODE: locking.ALL_SET,
14915 # Removing backups is quick, so blocking allocations is justified
14916 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14919 # Allocations should be stopped while this LU runs with node locks, but it
14920 # doesn't have to be exclusive
14921 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14923 def Exec(self, feedback_fn):
14924 """Remove any export.
14927 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14928 # If the instance was not found we'll try with the name that was passed in.
14929 # This will only work if it was an FQDN, though.
14931 if not instance_name:
14933 instance_name = self.op.instance_name
14935 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14936 exportlist = self.rpc.call_export_list(locked_nodes)
14938 for node in exportlist:
14939 msg = exportlist[node].fail_msg
14941 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14943 if instance_name in exportlist[node].payload:
14945 result = self.rpc.call_export_remove(node, instance_name)
14946 msg = result.fail_msg
14948 logging.error("Could not remove export for instance %s"
14949 " on node %s: %s", instance_name, node, msg)
14951 if fqdn_warn and not found:
14952 feedback_fn("Export not found. If trying to remove an export belonging"
14953 " to a deleted instance please use its Fully Qualified"
14957 class LUGroupAdd(LogicalUnit):
14958 """Logical unit for creating node groups.
14961 HPATH = "group-add"
14962 HTYPE = constants.HTYPE_GROUP
14965 def ExpandNames(self):
14966 # We need the new group's UUID here so that we can create and acquire the
14967 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14968 # that it should not check whether the UUID exists in the configuration.
14969 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14970 self.needed_locks = {}
14971 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14973 def CheckPrereq(self):
14974 """Check prerequisites.
14976 This checks that the given group name is not an existing node group
14981 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14982 except errors.OpPrereqError:
14985 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14986 " node group (UUID: %s)" %
14987 (self.op.group_name, existing_uuid),
14988 errors.ECODE_EXISTS)
14990 if self.op.ndparams:
14991 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14993 if self.op.hv_state:
14994 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14996 self.new_hv_state = None
14998 if self.op.disk_state:
14999 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
15001 self.new_disk_state = None
15003 if self.op.diskparams:
15004 for templ in constants.DISK_TEMPLATES:
15005 if templ in self.op.diskparams:
15006 utils.ForceDictType(self.op.diskparams[templ],
15007 constants.DISK_DT_TYPES)
15008 self.new_diskparams = self.op.diskparams
15010 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15011 except errors.OpPrereqError, err:
15012 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15013 errors.ECODE_INVAL)
15015 self.new_diskparams = {}
15017 if self.op.ipolicy:
15018 cluster = self.cfg.GetClusterInfo()
15019 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
15021 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
15022 except errors.ConfigurationError, err:
15023 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
15024 errors.ECODE_INVAL)
15026 def BuildHooksEnv(self):
15027 """Build hooks env.
15031 "GROUP_NAME": self.op.group_name,
15034 def BuildHooksNodes(self):
15035 """Build hooks nodes.
15038 mn = self.cfg.GetMasterNode()
15039 return ([mn], [mn])
15041 def Exec(self, feedback_fn):
15042 """Add the node group to the cluster.
15045 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15046 uuid=self.group_uuid,
15047 alloc_policy=self.op.alloc_policy,
15048 ndparams=self.op.ndparams,
15049 diskparams=self.new_diskparams,
15050 ipolicy=self.op.ipolicy,
15051 hv_state_static=self.new_hv_state,
15052 disk_state_static=self.new_disk_state)
15054 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15055 del self.remove_locks[locking.LEVEL_NODEGROUP]
15058 class LUGroupAssignNodes(NoHooksLU):
15059 """Logical unit for assigning nodes to groups.
15064 def ExpandNames(self):
15065 # These raise errors.OpPrereqError on their own:
15066 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15067 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15069 # We want to lock all the affected nodes and groups. We have readily
15070 # available the list of nodes, and the *destination* group. To gather the
15071 # list of "source" groups, we need to fetch node information later on.
15072 self.needed_locks = {
15073 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15074 locking.LEVEL_NODE: self.op.nodes,
15077 def DeclareLocks(self, level):
15078 if level == locking.LEVEL_NODEGROUP:
15079 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15081 # Try to get all affected nodes' groups without having the group or node
15082 # lock yet. Needs verification later in the code flow.
15083 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15085 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15087 def CheckPrereq(self):
15088 """Check prerequisites.
15091 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15092 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15093 frozenset(self.op.nodes))
15095 expected_locks = (set([self.group_uuid]) |
15096 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15097 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15098 if actual_locks != expected_locks:
15099 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15100 " current groups are '%s', used to be '%s'" %
15101 (utils.CommaJoin(expected_locks),
15102 utils.CommaJoin(actual_locks)))
15104 self.node_data = self.cfg.GetAllNodesInfo()
15105 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15106 instance_data = self.cfg.GetAllInstancesInfo()
15108 if self.group is None:
15109 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15110 (self.op.group_name, self.group_uuid))
15112 (new_splits, previous_splits) = \
15113 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15114 for node in self.op.nodes],
15115 self.node_data, instance_data)
15118 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15120 if not self.op.force:
15121 raise errors.OpExecError("The following instances get split by this"
15122 " change and --force was not given: %s" %
15125 self.LogWarning("This operation will split the following instances: %s",
15128 if previous_splits:
15129 self.LogWarning("In addition, these already-split instances continue"
15130 " to be split across groups: %s",
15131 utils.CommaJoin(utils.NiceSort(previous_splits)))
15133 def Exec(self, feedback_fn):
15134 """Assign nodes to a new group.
15137 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15139 self.cfg.AssignGroupNodes(mods)
15142 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15143 """Check for split instances after a node assignment.
15145 This method considers a series of node assignments as an atomic operation,
15146 and returns information about split instances after applying the set of
15149 In particular, it returns information about newly split instances, and
15150 instances that were already split, and remain so after the change.
15152 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15155 @type changes: list of (node_name, new_group_uuid) pairs.
15156 @param changes: list of node assignments to consider.
15157 @param node_data: a dict with data for all nodes
15158 @param instance_data: a dict with all instances to consider
15159 @rtype: a two-tuple
15160 @return: a list of instances that were previously okay and result split as a
15161 consequence of this change, and a list of instances that were previously
15162 split and this change does not fix.
15165 changed_nodes = dict((node, group) for node, group in changes
15166 if node_data[node].group != group)
15168 all_split_instances = set()
15169 previously_split_instances = set()
15171 def InstanceNodes(instance):
15172 return [instance.primary_node] + list(instance.secondary_nodes)
15174 for inst in instance_data.values():
15175 if inst.disk_template not in constants.DTS_INT_MIRROR:
15178 instance_nodes = InstanceNodes(inst)
15180 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15181 previously_split_instances.add(inst.name)
15183 if len(set(changed_nodes.get(node, node_data[node].group)
15184 for node in instance_nodes)) > 1:
15185 all_split_instances.add(inst.name)
15187 return (list(all_split_instances - previously_split_instances),
15188 list(previously_split_instances & all_split_instances))
15191 class _GroupQuery(_QueryBase):
15192 FIELDS = query.GROUP_FIELDS
15194 def ExpandNames(self, lu):
15195 lu.needed_locks = {}
15197 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15198 self._cluster = lu.cfg.GetClusterInfo()
15199 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15202 self.wanted = [name_to_uuid[name]
15203 for name in utils.NiceSort(name_to_uuid.keys())]
15205 # Accept names to be either names or UUIDs.
15208 all_uuid = frozenset(self._all_groups.keys())
15210 for name in self.names:
15211 if name in all_uuid:
15212 self.wanted.append(name)
15213 elif name in name_to_uuid:
15214 self.wanted.append(name_to_uuid[name])
15216 missing.append(name)
15219 raise errors.OpPrereqError("Some groups do not exist: %s" %
15220 utils.CommaJoin(missing),
15221 errors.ECODE_NOENT)
15223 def DeclareLocks(self, lu, level):
15226 def _GetQueryData(self, lu):
15227 """Computes the list of node groups and their attributes.
15230 do_nodes = query.GQ_NODE in self.requested_data
15231 do_instances = query.GQ_INST in self.requested_data
15233 group_to_nodes = None
15234 group_to_instances = None
15236 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15237 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15238 # latter GetAllInstancesInfo() is not enough, for we have to go through
15239 # instance->node. Hence, we will need to process nodes even if we only need
15240 # instance information.
15241 if do_nodes or do_instances:
15242 all_nodes = lu.cfg.GetAllNodesInfo()
15243 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15246 for node in all_nodes.values():
15247 if node.group in group_to_nodes:
15248 group_to_nodes[node.group].append(node.name)
15249 node_to_group[node.name] = node.group
15252 all_instances = lu.cfg.GetAllInstancesInfo()
15253 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15255 for instance in all_instances.values():
15256 node = instance.primary_node
15257 if node in node_to_group:
15258 group_to_instances[node_to_group[node]].append(instance.name)
15261 # Do not pass on node information if it was not requested.
15262 group_to_nodes = None
15264 return query.GroupQueryData(self._cluster,
15265 [self._all_groups[uuid]
15266 for uuid in self.wanted],
15267 group_to_nodes, group_to_instances,
15268 query.GQ_DISKPARAMS in self.requested_data)
15271 class LUGroupQuery(NoHooksLU):
15272 """Logical unit for querying node groups.
15277 def CheckArguments(self):
15278 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15279 self.op.output_fields, False)
15281 def ExpandNames(self):
15282 self.gq.ExpandNames(self)
15284 def DeclareLocks(self, level):
15285 self.gq.DeclareLocks(self, level)
15287 def Exec(self, feedback_fn):
15288 return self.gq.OldStyleQuery(self)
15291 class LUGroupSetParams(LogicalUnit):
15292 """Modifies the parameters of a node group.
15295 HPATH = "group-modify"
15296 HTYPE = constants.HTYPE_GROUP
15299 def CheckArguments(self):
15302 self.op.diskparams,
15303 self.op.alloc_policy,
15305 self.op.disk_state,
15309 if all_changes.count(None) == len(all_changes):
15310 raise errors.OpPrereqError("Please pass at least one modification",
15311 errors.ECODE_INVAL)
15313 def ExpandNames(self):
15314 # This raises errors.OpPrereqError on its own:
15315 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15317 self.needed_locks = {
15318 locking.LEVEL_INSTANCE: [],
15319 locking.LEVEL_NODEGROUP: [self.group_uuid],
15322 self.share_locks[locking.LEVEL_INSTANCE] = 1
15324 def DeclareLocks(self, level):
15325 if level == locking.LEVEL_INSTANCE:
15326 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15328 # Lock instances optimistically, needs verification once group lock has
15330 self.needed_locks[locking.LEVEL_INSTANCE] = \
15331 self.cfg.GetNodeGroupInstances(self.group_uuid)
15334 def _UpdateAndVerifyDiskParams(old, new):
15335 """Updates and verifies disk parameters.
15338 new_params = _GetUpdatedParams(old, new)
15339 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15342 def CheckPrereq(self):
15343 """Check prerequisites.
15346 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15348 # Check if locked instances are still correct
15349 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15351 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15352 cluster = self.cfg.GetClusterInfo()
15354 if self.group is None:
15355 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15356 (self.op.group_name, self.group_uuid))
15358 if self.op.ndparams:
15359 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15360 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15361 self.new_ndparams = new_ndparams
15363 if self.op.diskparams:
15364 diskparams = self.group.diskparams
15365 uavdp = self._UpdateAndVerifyDiskParams
15366 # For each disktemplate subdict update and verify the values
15367 new_diskparams = dict((dt,
15368 uavdp(diskparams.get(dt, {}),
15369 self.op.diskparams[dt]))
15370 for dt in constants.DISK_TEMPLATES
15371 if dt in self.op.diskparams)
15372 # As we've all subdicts of diskparams ready, lets merge the actual
15373 # dict with all updated subdicts
15374 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15376 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15377 except errors.OpPrereqError, err:
15378 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15379 errors.ECODE_INVAL)
15381 if self.op.hv_state:
15382 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15383 self.group.hv_state_static)
15385 if self.op.disk_state:
15386 self.new_disk_state = \
15387 _MergeAndVerifyDiskState(self.op.disk_state,
15388 self.group.disk_state_static)
15390 if self.op.ipolicy:
15391 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15395 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15396 inst_filter = lambda inst: inst.name in owned_instances
15397 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15398 gmi = ganeti.masterd.instance
15400 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15402 new_ipolicy, instances, self.cfg)
15405 self.LogWarning("After the ipolicy change the following instances"
15406 " violate them: %s",
15407 utils.CommaJoin(violations))
15409 def BuildHooksEnv(self):
15410 """Build hooks env.
15414 "GROUP_NAME": self.op.group_name,
15415 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15418 def BuildHooksNodes(self):
15419 """Build hooks nodes.
15422 mn = self.cfg.GetMasterNode()
15423 return ([mn], [mn])
15425 def Exec(self, feedback_fn):
15426 """Modifies the node group.
15431 if self.op.ndparams:
15432 self.group.ndparams = self.new_ndparams
15433 result.append(("ndparams", str(self.group.ndparams)))
15435 if self.op.diskparams:
15436 self.group.diskparams = self.new_diskparams
15437 result.append(("diskparams", str(self.group.diskparams)))
15439 if self.op.alloc_policy:
15440 self.group.alloc_policy = self.op.alloc_policy
15442 if self.op.hv_state:
15443 self.group.hv_state_static = self.new_hv_state
15445 if self.op.disk_state:
15446 self.group.disk_state_static = self.new_disk_state
15448 if self.op.ipolicy:
15449 self.group.ipolicy = self.new_ipolicy
15451 self.cfg.Update(self.group, feedback_fn)
15455 class LUGroupRemove(LogicalUnit):
15456 HPATH = "group-remove"
15457 HTYPE = constants.HTYPE_GROUP
15460 def ExpandNames(self):
15461 # This will raises errors.OpPrereqError on its own:
15462 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15463 self.needed_locks = {
15464 locking.LEVEL_NODEGROUP: [self.group_uuid],
15467 def CheckPrereq(self):
15468 """Check prerequisites.
15470 This checks that the given group name exists as a node group, that is
15471 empty (i.e., contains no nodes), and that is not the last group of the
15475 # Verify that the group is empty.
15476 group_nodes = [node.name
15477 for node in self.cfg.GetAllNodesInfo().values()
15478 if node.group == self.group_uuid]
15481 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15483 (self.op.group_name,
15484 utils.CommaJoin(utils.NiceSort(group_nodes))),
15485 errors.ECODE_STATE)
15487 # Verify the cluster would not be left group-less.
15488 if len(self.cfg.GetNodeGroupList()) == 1:
15489 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15490 " removed" % self.op.group_name,
15491 errors.ECODE_STATE)
15493 def BuildHooksEnv(self):
15494 """Build hooks env.
15498 "GROUP_NAME": self.op.group_name,
15501 def BuildHooksNodes(self):
15502 """Build hooks nodes.
15505 mn = self.cfg.GetMasterNode()
15506 return ([mn], [mn])
15508 def Exec(self, feedback_fn):
15509 """Remove the node group.
15513 self.cfg.RemoveNodeGroup(self.group_uuid)
15514 except errors.ConfigurationError:
15515 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15516 (self.op.group_name, self.group_uuid))
15518 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15521 class LUGroupRename(LogicalUnit):
15522 HPATH = "group-rename"
15523 HTYPE = constants.HTYPE_GROUP
15526 def ExpandNames(self):
15527 # This raises errors.OpPrereqError on its own:
15528 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15530 self.needed_locks = {
15531 locking.LEVEL_NODEGROUP: [self.group_uuid],
15534 def CheckPrereq(self):
15535 """Check prerequisites.
15537 Ensures requested new name is not yet used.
15541 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15542 except errors.OpPrereqError:
15545 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15546 " node group (UUID: %s)" %
15547 (self.op.new_name, new_name_uuid),
15548 errors.ECODE_EXISTS)
15550 def BuildHooksEnv(self):
15551 """Build hooks env.
15555 "OLD_NAME": self.op.group_name,
15556 "NEW_NAME": self.op.new_name,
15559 def BuildHooksNodes(self):
15560 """Build hooks nodes.
15563 mn = self.cfg.GetMasterNode()
15565 all_nodes = self.cfg.GetAllNodesInfo()
15566 all_nodes.pop(mn, None)
15569 run_nodes.extend(node.name for node in all_nodes.values()
15570 if node.group == self.group_uuid)
15572 return (run_nodes, run_nodes)
15574 def Exec(self, feedback_fn):
15575 """Rename the node group.
15578 group = self.cfg.GetNodeGroup(self.group_uuid)
15581 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15582 (self.op.group_name, self.group_uuid))
15584 group.name = self.op.new_name
15585 self.cfg.Update(group, feedback_fn)
15587 return self.op.new_name
15590 class LUGroupEvacuate(LogicalUnit):
15591 HPATH = "group-evacuate"
15592 HTYPE = constants.HTYPE_GROUP
15595 def ExpandNames(self):
15596 # This raises errors.OpPrereqError on its own:
15597 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15599 if self.op.target_groups:
15600 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15601 self.op.target_groups)
15603 self.req_target_uuids = []
15605 if self.group_uuid in self.req_target_uuids:
15606 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15607 " as a target group (targets are %s)" %
15609 utils.CommaJoin(self.req_target_uuids)),
15610 errors.ECODE_INVAL)
15612 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15614 self.share_locks = _ShareAll()
15615 self.needed_locks = {
15616 locking.LEVEL_INSTANCE: [],
15617 locking.LEVEL_NODEGROUP: [],
15618 locking.LEVEL_NODE: [],
15621 def DeclareLocks(self, level):
15622 if level == locking.LEVEL_INSTANCE:
15623 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15625 # Lock instances optimistically, needs verification once node and group
15626 # locks have been acquired
15627 self.needed_locks[locking.LEVEL_INSTANCE] = \
15628 self.cfg.GetNodeGroupInstances(self.group_uuid)
15630 elif level == locking.LEVEL_NODEGROUP:
15631 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15633 if self.req_target_uuids:
15634 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15636 # Lock all groups used by instances optimistically; this requires going
15637 # via the node before it's locked, requiring verification later on
15638 lock_groups.update(group_uuid
15639 for instance_name in
15640 self.owned_locks(locking.LEVEL_INSTANCE)
15642 self.cfg.GetInstanceNodeGroups(instance_name))
15644 # No target groups, need to lock all of them
15645 lock_groups = locking.ALL_SET
15647 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15649 elif level == locking.LEVEL_NODE:
15650 # This will only lock the nodes in the group to be evacuated which
15651 # contain actual instances
15652 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15653 self._LockInstancesNodes()
15655 # Lock all nodes in group to be evacuated and target groups
15656 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15657 assert self.group_uuid in owned_groups
15658 member_nodes = [node_name
15659 for group in owned_groups
15660 for node_name in self.cfg.GetNodeGroup(group).members]
15661 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15663 def CheckPrereq(self):
15664 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15665 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15666 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15668 assert owned_groups.issuperset(self.req_target_uuids)
15669 assert self.group_uuid in owned_groups
15671 # Check if locked instances are still correct
15672 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15674 # Get instance information
15675 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15677 # Check if node groups for locked instances are still correct
15678 _CheckInstancesNodeGroups(self.cfg, self.instances,
15679 owned_groups, owned_nodes, self.group_uuid)
15681 if self.req_target_uuids:
15682 # User requested specific target groups
15683 self.target_uuids = self.req_target_uuids
15685 # All groups except the one to be evacuated are potential targets
15686 self.target_uuids = [group_uuid for group_uuid in owned_groups
15687 if group_uuid != self.group_uuid]
15689 if not self.target_uuids:
15690 raise errors.OpPrereqError("There are no possible target groups",
15691 errors.ECODE_INVAL)
15693 def BuildHooksEnv(self):
15694 """Build hooks env.
15698 "GROUP_NAME": self.op.group_name,
15699 "TARGET_GROUPS": " ".join(self.target_uuids),
15702 def BuildHooksNodes(self):
15703 """Build hooks nodes.
15706 mn = self.cfg.GetMasterNode()
15708 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15710 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15712 return (run_nodes, run_nodes)
15714 def Exec(self, feedback_fn):
15715 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15717 assert self.group_uuid not in self.target_uuids
15719 req = iallocator.IAReqGroupChange(instances=instances,
15720 target_groups=self.target_uuids)
15721 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15723 ial.Run(self.op.iallocator)
15725 if not ial.success:
15726 raise errors.OpPrereqError("Can't compute group evacuation using"
15727 " iallocator '%s': %s" %
15728 (self.op.iallocator, ial.info),
15729 errors.ECODE_NORES)
15731 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15733 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15734 len(jobs), self.op.group_name)
15736 return ResultWithJobs(jobs)
15739 class TagsLU(NoHooksLU): # pylint: disable=W0223
15740 """Generic tags LU.
15742 This is an abstract class which is the parent of all the other tags LUs.
15745 def ExpandNames(self):
15746 self.group_uuid = None
15747 self.needed_locks = {}
15749 if self.op.kind == constants.TAG_NODE:
15750 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15751 lock_level = locking.LEVEL_NODE
15752 lock_name = self.op.name
15753 elif self.op.kind == constants.TAG_INSTANCE:
15754 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15755 lock_level = locking.LEVEL_INSTANCE
15756 lock_name = self.op.name
15757 elif self.op.kind == constants.TAG_NODEGROUP:
15758 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15759 lock_level = locking.LEVEL_NODEGROUP
15760 lock_name = self.group_uuid
15761 elif self.op.kind == constants.TAG_NETWORK:
15762 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15763 lock_level = locking.LEVEL_NETWORK
15764 lock_name = self.network_uuid
15769 if lock_level and getattr(self.op, "use_locking", True):
15770 self.needed_locks[lock_level] = lock_name
15772 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15773 # not possible to acquire the BGL based on opcode parameters)
15775 def CheckPrereq(self):
15776 """Check prerequisites.
15779 if self.op.kind == constants.TAG_CLUSTER:
15780 self.target = self.cfg.GetClusterInfo()
15781 elif self.op.kind == constants.TAG_NODE:
15782 self.target = self.cfg.GetNodeInfo(self.op.name)
15783 elif self.op.kind == constants.TAG_INSTANCE:
15784 self.target = self.cfg.GetInstanceInfo(self.op.name)
15785 elif self.op.kind == constants.TAG_NODEGROUP:
15786 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15787 elif self.op.kind == constants.TAG_NETWORK:
15788 self.target = self.cfg.GetNetwork(self.network_uuid)
15790 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15791 str(self.op.kind), errors.ECODE_INVAL)
15794 class LUTagsGet(TagsLU):
15795 """Returns the tags of a given object.
15800 def ExpandNames(self):
15801 TagsLU.ExpandNames(self)
15803 # Share locks as this is only a read operation
15804 self.share_locks = _ShareAll()
15806 def Exec(self, feedback_fn):
15807 """Returns the tag list.
15810 return list(self.target.GetTags())
15813 class LUTagsSearch(NoHooksLU):
15814 """Searches the tags for a given pattern.
15819 def ExpandNames(self):
15820 self.needed_locks = {}
15822 def CheckPrereq(self):
15823 """Check prerequisites.
15825 This checks the pattern passed for validity by compiling it.
15829 self.re = re.compile(self.op.pattern)
15830 except re.error, err:
15831 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15832 (self.op.pattern, err), errors.ECODE_INVAL)
15834 def Exec(self, feedback_fn):
15835 """Returns the tag list.
15839 tgts = [("/cluster", cfg.GetClusterInfo())]
15840 ilist = cfg.GetAllInstancesInfo().values()
15841 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15842 nlist = cfg.GetAllNodesInfo().values()
15843 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15844 tgts.extend(("/nodegroup/%s" % n.name, n)
15845 for n in cfg.GetAllNodeGroupsInfo().values())
15847 for path, target in tgts:
15848 for tag in target.GetTags():
15849 if self.re.search(tag):
15850 results.append((path, tag))
15854 class LUTagsSet(TagsLU):
15855 """Sets a tag on a given object.
15860 def CheckPrereq(self):
15861 """Check prerequisites.
15863 This checks the type and length of the tag name and value.
15866 TagsLU.CheckPrereq(self)
15867 for tag in self.op.tags:
15868 objects.TaggableObject.ValidateTag(tag)
15870 def Exec(self, feedback_fn):
15875 for tag in self.op.tags:
15876 self.target.AddTag(tag)
15877 except errors.TagError, err:
15878 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15879 self.cfg.Update(self.target, feedback_fn)
15882 class LUTagsDel(TagsLU):
15883 """Delete a list of tags from a given object.
15888 def CheckPrereq(self):
15889 """Check prerequisites.
15891 This checks that we have the given tag.
15894 TagsLU.CheckPrereq(self)
15895 for tag in self.op.tags:
15896 objects.TaggableObject.ValidateTag(tag)
15897 del_tags = frozenset(self.op.tags)
15898 cur_tags = self.target.GetTags()
15900 diff_tags = del_tags - cur_tags
15902 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15903 raise errors.OpPrereqError("Tag(s) %s not found" %
15904 (utils.CommaJoin(diff_names), ),
15905 errors.ECODE_NOENT)
15907 def Exec(self, feedback_fn):
15908 """Remove the tag from the object.
15911 for tag in self.op.tags:
15912 self.target.RemoveTag(tag)
15913 self.cfg.Update(self.target, feedback_fn)
15916 class LUTestDelay(NoHooksLU):
15917 """Sleep for a specified amount of time.
15919 This LU sleeps on the master and/or nodes for a specified amount of
15925 def ExpandNames(self):
15926 """Expand names and set required locks.
15928 This expands the node list, if any.
15931 self.needed_locks = {}
15932 if self.op.on_nodes:
15933 # _GetWantedNodes can be used here, but is not always appropriate to use
15934 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15935 # more information.
15936 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15937 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15939 def _TestDelay(self):
15940 """Do the actual sleep.
15943 if self.op.on_master:
15944 if not utils.TestDelay(self.op.duration):
15945 raise errors.OpExecError("Error during master delay test")
15946 if self.op.on_nodes:
15947 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15948 for node, node_result in result.items():
15949 node_result.Raise("Failure during rpc call to node %s" % node)
15951 def Exec(self, feedback_fn):
15952 """Execute the test delay opcode, with the wanted repetitions.
15955 if self.op.repeat == 0:
15958 top_value = self.op.repeat - 1
15959 for i in range(self.op.repeat):
15960 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15964 class LURestrictedCommand(NoHooksLU):
15965 """Logical unit for executing restricted commands.
15970 def ExpandNames(self):
15972 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15974 self.needed_locks = {
15975 locking.LEVEL_NODE: self.op.nodes,
15977 self.share_locks = {
15978 locking.LEVEL_NODE: not self.op.use_locking,
15981 def CheckPrereq(self):
15982 """Check prerequisites.
15986 def Exec(self, feedback_fn):
15987 """Execute restricted command and return output.
15990 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15992 # Check if correct locks are held
15993 assert set(self.op.nodes).issubset(owned_nodes)
15995 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15999 for node_name in self.op.nodes:
16000 nres = rpcres[node_name]
16002 msg = ("Command '%s' on node '%s' failed: %s" %
16003 (self.op.command, node_name, nres.fail_msg))
16004 result.append((False, msg))
16006 result.append((True, nres.payload))
16011 class LUTestJqueue(NoHooksLU):
16012 """Utility LU to test some aspects of the job queue.
16017 # Must be lower than default timeout for WaitForJobChange to see whether it
16018 # notices changed jobs
16019 _CLIENT_CONNECT_TIMEOUT = 20.0
16020 _CLIENT_CONFIRM_TIMEOUT = 60.0
16023 def _NotifyUsingSocket(cls, cb, errcls):
16024 """Opens a Unix socket and waits for another program to connect.
16027 @param cb: Callback to send socket name to client
16028 @type errcls: class
16029 @param errcls: Exception class to use for errors
16032 # Using a temporary directory as there's no easy way to create temporary
16033 # sockets without writing a custom loop around tempfile.mktemp and
16035 tmpdir = tempfile.mkdtemp()
16037 tmpsock = utils.PathJoin(tmpdir, "sock")
16039 logging.debug("Creating temporary socket at %s", tmpsock)
16040 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16045 # Send details to client
16048 # Wait for client to connect before continuing
16049 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16051 (conn, _) = sock.accept()
16052 except socket.error, err:
16053 raise errcls("Client didn't connect in time (%s)" % err)
16057 # Remove as soon as client is connected
16058 shutil.rmtree(tmpdir)
16060 # Wait for client to close
16063 # pylint: disable=E1101
16064 # Instance of '_socketobject' has no ... member
16065 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16067 except socket.error, err:
16068 raise errcls("Client failed to confirm notification (%s)" % err)
16072 def _SendNotification(self, test, arg, sockname):
16073 """Sends a notification to the client.
16076 @param test: Test name
16077 @param arg: Test argument (depends on test)
16078 @type sockname: string
16079 @param sockname: Socket path
16082 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16084 def _Notify(self, prereq, test, arg):
16085 """Notifies the client of a test.
16088 @param prereq: Whether this is a prereq-phase test
16090 @param test: Test name
16091 @param arg: Test argument (depends on test)
16095 errcls = errors.OpPrereqError
16097 errcls = errors.OpExecError
16099 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16103 def CheckArguments(self):
16104 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16105 self.expandnames_calls = 0
16107 def ExpandNames(self):
16108 checkargs_calls = getattr(self, "checkargs_calls", 0)
16109 if checkargs_calls < 1:
16110 raise errors.ProgrammerError("CheckArguments was not called")
16112 self.expandnames_calls += 1
16114 if self.op.notify_waitlock:
16115 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16117 self.LogInfo("Expanding names")
16119 # Get lock on master node (just to get a lock, not for a particular reason)
16120 self.needed_locks = {
16121 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16124 def Exec(self, feedback_fn):
16125 if self.expandnames_calls < 1:
16126 raise errors.ProgrammerError("ExpandNames was not called")
16128 if self.op.notify_exec:
16129 self._Notify(False, constants.JQT_EXEC, None)
16131 self.LogInfo("Executing")
16133 if self.op.log_messages:
16134 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16135 for idx, msg in enumerate(self.op.log_messages):
16136 self.LogInfo("Sending log message %s", idx + 1)
16137 feedback_fn(constants.JQT_MSGPREFIX + msg)
16138 # Report how many test messages have been sent
16139 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16142 raise errors.OpExecError("Opcode failure was requested")
16147 class LUTestAllocator(NoHooksLU):
16148 """Run allocator tests.
16150 This LU runs the allocator tests
16153 def CheckPrereq(self):
16154 """Check prerequisites.
16156 This checks the opcode parameters depending on the director and mode test.
16159 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16160 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16161 for attr in ["memory", "disks", "disk_template",
16162 "os", "tags", "nics", "vcpus"]:
16163 if not hasattr(self.op, attr):
16164 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16165 attr, errors.ECODE_INVAL)
16166 iname = self.cfg.ExpandInstanceName(self.op.name)
16167 if iname is not None:
16168 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16169 iname, errors.ECODE_EXISTS)
16170 if not isinstance(self.op.nics, list):
16171 raise errors.OpPrereqError("Invalid parameter 'nics'",
16172 errors.ECODE_INVAL)
16173 if not isinstance(self.op.disks, list):
16174 raise errors.OpPrereqError("Invalid parameter 'disks'",
16175 errors.ECODE_INVAL)
16176 for row in self.op.disks:
16177 if (not isinstance(row, dict) or
16178 constants.IDISK_SIZE not in row or
16179 not isinstance(row[constants.IDISK_SIZE], int) or
16180 constants.IDISK_MODE not in row or
16181 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16182 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16183 " parameter", errors.ECODE_INVAL)
16184 if self.op.hypervisor is None:
16185 self.op.hypervisor = self.cfg.GetHypervisorType()
16186 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16187 fname = _ExpandInstanceName(self.cfg, self.op.name)
16188 self.op.name = fname
16189 self.relocate_from = \
16190 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16191 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16192 constants.IALLOCATOR_MODE_NODE_EVAC):
16193 if not self.op.instances:
16194 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16195 self.op.instances = _GetWantedInstances(self, self.op.instances)
16197 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16198 self.op.mode, errors.ECODE_INVAL)
16200 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16201 if self.op.iallocator is None:
16202 raise errors.OpPrereqError("Missing allocator name",
16203 errors.ECODE_INVAL)
16204 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16205 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16206 self.op.direction, errors.ECODE_INVAL)
16208 def Exec(self, feedback_fn):
16209 """Run the allocator test.
16212 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16213 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16214 memory=self.op.memory,
16215 disks=self.op.disks,
16216 disk_template=self.op.disk_template,
16220 vcpus=self.op.vcpus,
16221 spindle_use=self.op.spindle_use,
16222 hypervisor=self.op.hypervisor,
16223 node_whitelist=None)
16224 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16225 req = iallocator.IAReqRelocate(name=self.op.name,
16226 relocate_from=list(self.relocate_from))
16227 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16228 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16229 target_groups=self.op.target_groups)
16230 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16231 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16232 evac_mode=self.op.evac_mode)
16233 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16234 disk_template = self.op.disk_template
16235 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16236 memory=self.op.memory,
16237 disks=self.op.disks,
16238 disk_template=disk_template,
16242 vcpus=self.op.vcpus,
16243 spindle_use=self.op.spindle_use,
16244 hypervisor=self.op.hypervisor)
16245 for idx in range(self.op.count)]
16246 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16248 raise errors.ProgrammerError("Uncatched mode %s in"
16249 " LUTestAllocator.Exec", self.op.mode)
16251 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16252 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16253 result = ial.in_text
16255 ial.Run(self.op.iallocator, validate=False)
16256 result = ial.out_text
16260 class LUNetworkAdd(LogicalUnit):
16261 """Logical unit for creating networks.
16264 HPATH = "network-add"
16265 HTYPE = constants.HTYPE_NETWORK
16268 def BuildHooksNodes(self):
16269 """Build hooks nodes.
16272 mn = self.cfg.GetMasterNode()
16273 return ([mn], [mn])
16275 def CheckArguments(self):
16276 if self.op.mac_prefix:
16277 self.op.mac_prefix = \
16278 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16280 def ExpandNames(self):
16281 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16283 if self.op.conflicts_check:
16284 self.share_locks[locking.LEVEL_NODE] = 1
16285 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16286 self.needed_locks = {
16287 locking.LEVEL_NODE: locking.ALL_SET,
16288 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16291 self.needed_locks = {}
16293 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16295 def CheckPrereq(self):
16296 if self.op.network is None:
16297 raise errors.OpPrereqError("Network must be given",
16298 errors.ECODE_INVAL)
16301 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16302 except errors.OpPrereqError:
16305 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16306 " network (UUID: %s)" %
16307 (self.op.network_name, existing_uuid),
16308 errors.ECODE_EXISTS)
16310 # Check tag validity
16311 for tag in self.op.tags:
16312 objects.TaggableObject.ValidateTag(tag)
16314 def BuildHooksEnv(self):
16315 """Build hooks env.
16319 "name": self.op.network_name,
16320 "subnet": self.op.network,
16321 "gateway": self.op.gateway,
16322 "network6": self.op.network6,
16323 "gateway6": self.op.gateway6,
16324 "mac_prefix": self.op.mac_prefix,
16325 "tags": self.op.tags,
16327 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16329 def Exec(self, feedback_fn):
16330 """Add the ip pool to the cluster.
16333 nobj = objects.Network(name=self.op.network_name,
16334 network=self.op.network,
16335 gateway=self.op.gateway,
16336 network6=self.op.network6,
16337 gateway6=self.op.gateway6,
16338 mac_prefix=self.op.mac_prefix,
16339 uuid=self.network_uuid)
16340 # Initialize the associated address pool
16342 pool = network.AddressPool.InitializeNetwork(nobj)
16343 except errors.AddressPoolError, err:
16344 raise errors.OpExecError("Cannot create IP address pool for network"
16345 " '%s': %s" % (self.op.network_name, err))
16347 # Check if we need to reserve the nodes and the cluster master IP
16348 # These may not be allocated to any instances in routed mode, as
16349 # they wouldn't function anyway.
16350 if self.op.conflicts_check:
16351 for node in self.cfg.GetAllNodesInfo().values():
16352 for ip in [node.primary_ip, node.secondary_ip]:
16354 if pool.Contains(ip):
16356 self.LogInfo("Reserved IP address of node '%s' (%s)",
16358 except errors.AddressPoolError, err:
16359 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16360 ip, node.name, err)
16362 master_ip = self.cfg.GetClusterInfo().master_ip
16364 if pool.Contains(master_ip):
16365 pool.Reserve(master_ip)
16366 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16367 except errors.AddressPoolError, err:
16368 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16371 if self.op.add_reserved_ips:
16372 for ip in self.op.add_reserved_ips:
16374 pool.Reserve(ip, external=True)
16375 except errors.AddressPoolError, err:
16376 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16380 for tag in self.op.tags:
16383 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16384 del self.remove_locks[locking.LEVEL_NETWORK]
16387 class LUNetworkRemove(LogicalUnit):
16388 HPATH = "network-remove"
16389 HTYPE = constants.HTYPE_NETWORK
16392 def ExpandNames(self):
16393 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16395 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16396 self.needed_locks = {
16397 locking.LEVEL_NETWORK: [self.network_uuid],
16398 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16401 def CheckPrereq(self):
16402 """Check prerequisites.
16404 This checks that the given network name exists as a network, that is
16405 empty (i.e., contains no nodes), and that is not the last group of the
16409 # Verify that the network is not conncted.
16410 node_groups = [group.name
16411 for group in self.cfg.GetAllNodeGroupsInfo().values()
16412 if self.network_uuid in group.networks]
16415 self.LogWarning("Network '%s' is connected to the following"
16416 " node groups: %s" %
16417 (self.op.network_name,
16418 utils.CommaJoin(utils.NiceSort(node_groups))))
16419 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16421 def BuildHooksEnv(self):
16422 """Build hooks env.
16426 "NETWORK_NAME": self.op.network_name,
16429 def BuildHooksNodes(self):
16430 """Build hooks nodes.
16433 mn = self.cfg.GetMasterNode()
16434 return ([mn], [mn])
16436 def Exec(self, feedback_fn):
16437 """Remove the network.
16441 self.cfg.RemoveNetwork(self.network_uuid)
16442 except errors.ConfigurationError:
16443 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16444 (self.op.network_name, self.network_uuid))
16447 class LUNetworkSetParams(LogicalUnit):
16448 """Modifies the parameters of a network.
16451 HPATH = "network-modify"
16452 HTYPE = constants.HTYPE_NETWORK
16455 def CheckArguments(self):
16456 if (self.op.gateway and
16457 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16458 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16459 " at once", errors.ECODE_INVAL)
16461 def ExpandNames(self):
16462 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16464 self.needed_locks = {
16465 locking.LEVEL_NETWORK: [self.network_uuid],
16468 def CheckPrereq(self):
16469 """Check prerequisites.
16472 self.network = self.cfg.GetNetwork(self.network_uuid)
16473 self.gateway = self.network.gateway
16474 self.mac_prefix = self.network.mac_prefix
16475 self.network6 = self.network.network6
16476 self.gateway6 = self.network.gateway6
16477 self.tags = self.network.tags
16479 self.pool = network.AddressPool(self.network)
16481 if self.op.gateway:
16482 if self.op.gateway == constants.VALUE_NONE:
16483 self.gateway = None
16485 self.gateway = self.op.gateway
16486 if self.pool.IsReserved(self.gateway):
16487 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16488 " reserved" % self.gateway,
16489 errors.ECODE_STATE)
16491 if self.op.mac_prefix:
16492 if self.op.mac_prefix == constants.VALUE_NONE:
16493 self.mac_prefix = None
16495 self.mac_prefix = \
16496 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16498 if self.op.gateway6:
16499 if self.op.gateway6 == constants.VALUE_NONE:
16500 self.gateway6 = None
16502 self.gateway6 = self.op.gateway6
16504 if self.op.network6:
16505 if self.op.network6 == constants.VALUE_NONE:
16506 self.network6 = None
16508 self.network6 = self.op.network6
16510 def BuildHooksEnv(self):
16511 """Build hooks env.
16515 "name": self.op.network_name,
16516 "subnet": self.network.network,
16517 "gateway": self.gateway,
16518 "network6": self.network6,
16519 "gateway6": self.gateway6,
16520 "mac_prefix": self.mac_prefix,
16523 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16525 def BuildHooksNodes(self):
16526 """Build hooks nodes.
16529 mn = self.cfg.GetMasterNode()
16530 return ([mn], [mn])
16532 def Exec(self, feedback_fn):
16533 """Modifies the network.
16536 #TODO: reserve/release via temporary reservation manager
16537 # extend cfg.ReserveIp/ReleaseIp with the external flag
16538 if self.op.gateway:
16539 if self.gateway == self.network.gateway:
16540 self.LogWarning("Gateway is already %s", self.gateway)
16543 self.pool.Reserve(self.gateway, external=True)
16544 if self.network.gateway:
16545 self.pool.Release(self.network.gateway, external=True)
16546 self.network.gateway = self.gateway
16548 if self.op.add_reserved_ips:
16549 for ip in self.op.add_reserved_ips:
16551 if self.pool.IsReserved(ip):
16552 self.LogWarning("IP address %s is already reserved", ip)
16554 self.pool.Reserve(ip, external=True)
16555 except errors.AddressPoolError, err:
16556 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16558 if self.op.remove_reserved_ips:
16559 for ip in self.op.remove_reserved_ips:
16560 if ip == self.network.gateway:
16561 self.LogWarning("Cannot unreserve Gateway's IP")
16564 if not self.pool.IsReserved(ip):
16565 self.LogWarning("IP address %s is already unreserved", ip)
16567 self.pool.Release(ip, external=True)
16568 except errors.AddressPoolError, err:
16569 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16571 if self.op.mac_prefix:
16572 self.network.mac_prefix = self.mac_prefix
16574 if self.op.network6:
16575 self.network.network6 = self.network6
16577 if self.op.gateway6:
16578 self.network.gateway6 = self.gateway6
16580 self.pool.Validate()
16582 self.cfg.Update(self.network, feedback_fn)
16585 class _NetworkQuery(_QueryBase):
16586 FIELDS = query.NETWORK_FIELDS
16588 def ExpandNames(self, lu):
16589 lu.needed_locks = {}
16590 lu.share_locks = _ShareAll()
16592 self.do_locking = self.use_locking
16594 all_networks = lu.cfg.GetAllNetworksInfo()
16595 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16601 for name in self.names:
16602 if name in name_to_uuid:
16603 self.wanted.append(name_to_uuid[name])
16605 missing.append(name)
16608 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16609 errors.ECODE_NOENT)
16611 self.wanted = locking.ALL_SET
16613 if self.do_locking:
16614 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16615 if query.NETQ_INST in self.requested_data:
16616 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16617 if query.NETQ_GROUP in self.requested_data:
16618 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16620 def DeclareLocks(self, lu, level):
16623 def _GetQueryData(self, lu):
16624 """Computes the list of networks and their attributes.
16627 all_networks = lu.cfg.GetAllNetworksInfo()
16629 network_uuids = self._GetNames(lu, all_networks.keys(),
16630 locking.LEVEL_NETWORK)
16632 do_instances = query.NETQ_INST in self.requested_data
16633 do_groups = query.NETQ_GROUP in self.requested_data
16635 network_to_instances = None
16636 network_to_groups = None
16638 # For NETQ_GROUP, we need to map network->[groups]
16640 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16641 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16642 for _, group in all_groups.iteritems():
16643 for net_uuid in network_uuids:
16644 netparams = group.networks.get(net_uuid, None)
16646 info = (group.name, netparams[constants.NIC_MODE],
16647 netparams[constants.NIC_LINK])
16649 network_to_groups[net_uuid].append(info)
16652 all_instances = lu.cfg.GetAllInstancesInfo()
16653 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16654 for instance in all_instances.values():
16655 for nic in instance.nics:
16656 if nic.network in network_uuids:
16657 network_to_instances[nic.network].append(instance.name)
16660 if query.NETQ_STATS in self.requested_data:
16663 self._GetStats(network.AddressPool(all_networks[uuid])))
16664 for uuid in network_uuids)
16668 return query.NetworkQueryData([all_networks[uuid]
16669 for uuid in network_uuids],
16671 network_to_instances,
16675 def _GetStats(pool):
16676 """Returns statistics for a network address pool.
16680 "free_count": pool.GetFreeCount(),
16681 "reserved_count": pool.GetReservedCount(),
16682 "map": pool.GetMap(),
16683 "external_reservations":
16684 utils.CommaJoin(pool.GetExternalReservations()),
16688 class LUNetworkQuery(NoHooksLU):
16689 """Logical unit for querying networks.
16694 def CheckArguments(self):
16695 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16696 self.op.output_fields, self.op.use_locking)
16698 def ExpandNames(self):
16699 self.nq.ExpandNames(self)
16701 def Exec(self, feedback_fn):
16702 return self.nq.OldStyleQuery(self)
16705 class LUNetworkConnect(LogicalUnit):
16706 """Connect a network to a nodegroup
16709 HPATH = "network-connect"
16710 HTYPE = constants.HTYPE_NETWORK
16713 def ExpandNames(self):
16714 self.network_name = self.op.network_name
16715 self.group_name = self.op.group_name
16716 self.network_mode = self.op.network_mode
16717 self.network_link = self.op.network_link
16719 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16720 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16722 self.needed_locks = {
16723 locking.LEVEL_INSTANCE: [],
16724 locking.LEVEL_NODEGROUP: [self.group_uuid],
16726 self.share_locks[locking.LEVEL_INSTANCE] = 1
16728 if self.op.conflicts_check:
16729 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16730 self.share_locks[locking.LEVEL_NETWORK] = 1
16732 def DeclareLocks(self, level):
16733 if level == locking.LEVEL_INSTANCE:
16734 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16736 # Lock instances optimistically, needs verification once group lock has
16738 if self.op.conflicts_check:
16739 self.needed_locks[locking.LEVEL_INSTANCE] = \
16740 self.cfg.GetNodeGroupInstances(self.group_uuid)
16742 def BuildHooksEnv(self):
16744 "GROUP_NAME": self.group_name,
16745 "GROUP_NETWORK_MODE": self.network_mode,
16746 "GROUP_NETWORK_LINK": self.network_link,
16750 def BuildHooksNodes(self):
16751 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16752 return (nodes, nodes)
16754 def CheckPrereq(self):
16755 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16757 assert self.group_uuid in owned_groups
16759 # Check if locked instances are still correct
16760 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16761 if self.op.conflicts_check:
16762 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16765 constants.NIC_MODE: self.network_mode,
16766 constants.NIC_LINK: self.network_link,
16768 objects.NIC.CheckParameterSyntax(self.netparams)
16770 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16771 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16772 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16773 self.connected = False
16774 if self.network_uuid in self.group.networks:
16775 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16776 (self.network_name, self.group.name))
16777 self.connected = True
16779 # check only if not already connected
16780 elif self.op.conflicts_check:
16781 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16783 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16784 "connect to", owned_instances)
16786 def Exec(self, feedback_fn):
16787 # Connect the network and update the group only if not already connected
16788 if not self.connected:
16789 self.group.networks[self.network_uuid] = self.netparams
16790 self.cfg.Update(self.group, feedback_fn)
16793 def _NetworkConflictCheck(lu, check_fn, action, instances):
16794 """Checks for network interface conflicts with a network.
16796 @type lu: L{LogicalUnit}
16797 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16799 @param check_fn: Function checking for conflict
16800 @type action: string
16801 @param action: Part of error message (see code)
16802 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16807 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16808 instconflicts = [(idx, nic.ip)
16809 for (idx, nic) in enumerate(instance.nics)
16813 conflicts.append((instance.name, instconflicts))
16816 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16817 " node group '%s', are in use: %s" %
16818 (lu.network_name, action, lu.group.name,
16819 utils.CommaJoin(("%s: %s" %
16820 (name, _FmtNetworkConflict(details)))
16821 for (name, details) in conflicts)))
16823 raise errors.OpPrereqError("Conflicting IP addresses found; "
16824 " remove/modify the corresponding network"
16825 " interfaces", errors.ECODE_STATE)
16828 def _FmtNetworkConflict(details):
16829 """Utility for L{_NetworkConflictCheck}.
16832 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16833 for (idx, ipaddr) in details)
16836 class LUNetworkDisconnect(LogicalUnit):
16837 """Disconnect a network to a nodegroup
16840 HPATH = "network-disconnect"
16841 HTYPE = constants.HTYPE_NETWORK
16844 def ExpandNames(self):
16845 self.network_name = self.op.network_name
16846 self.group_name = self.op.group_name
16848 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16849 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16851 self.needed_locks = {
16852 locking.LEVEL_INSTANCE: [],
16853 locking.LEVEL_NODEGROUP: [self.group_uuid],
16855 self.share_locks[locking.LEVEL_INSTANCE] = 1
16857 def DeclareLocks(self, level):
16858 if level == locking.LEVEL_INSTANCE:
16859 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16861 # Lock instances optimistically, needs verification once group lock has
16863 self.needed_locks[locking.LEVEL_INSTANCE] = \
16864 self.cfg.GetNodeGroupInstances(self.group_uuid)
16866 def BuildHooksEnv(self):
16868 "GROUP_NAME": self.group_name,
16872 def BuildHooksNodes(self):
16873 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16874 return (nodes, nodes)
16876 def CheckPrereq(self):
16877 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16879 assert self.group_uuid in owned_groups
16881 # Check if locked instances are still correct
16882 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16883 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16885 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16886 self.connected = True
16887 if self.network_uuid not in self.group.networks:
16888 self.LogWarning("Network '%s' is not mapped to group '%s'",
16889 self.network_name, self.group.name)
16890 self.connected = False
16892 # We need this check only if network is not already connected
16894 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16895 "disconnect from", owned_instances)
16897 def Exec(self, feedback_fn):
16898 # Disconnect the network and update the group only if network is connected
16900 del self.group.networks[self.network_uuid]
16901 self.cfg.Update(self.group, feedback_fn)
16904 #: Query type implementations
16906 constants.QR_CLUSTER: _ClusterQuery,
16907 constants.QR_INSTANCE: _InstanceQuery,
16908 constants.QR_NODE: _NodeQuery,
16909 constants.QR_GROUP: _GroupQuery,
16910 constants.QR_NETWORK: _NetworkQuery,
16911 constants.QR_OS: _OsQuery,
16912 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16913 constants.QR_EXPORT: _ExportQuery,
16916 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16919 def _GetQueryImplementation(name):
16920 """Returns the implemtnation for a query type.
16922 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16926 return _QUERY_IMPL[name]
16928 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16929 errors.ECODE_INVAL)
16932 def _CheckForConflictingIp(lu, ip, node):
16933 """In case of conflicting IP address raise error.
16936 @param ip: IP address
16938 @param node: node name
16941 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16942 if conf_net is not None:
16943 raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
16944 " network %s, but the target NIC does not." %
16946 errors.ECODE_STATE)
16948 return (None, None)