4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1234 _compute_fn=_ComputeMinMaxSpec):
1235 """Verifies ipolicy against provided specs.
1238 @param ipolicy: The ipolicy
1240 @param mem_size: The memory size
1241 @type cpu_count: int
1242 @param cpu_count: Used cpu cores
1243 @type disk_count: int
1244 @param disk_count: Number of disks used
1245 @type nic_count: int
1246 @param nic_count: Number of nics used
1247 @type disk_sizes: list of ints
1248 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249 @type spindle_use: int
1250 @param spindle_use: The number of spindles this instance uses
1251 @param _compute_fn: The compute function (unittest only)
1252 @return: A list of violations, or an empty list of no violations are found
1255 assert disk_count == len(disk_sizes)
1258 (constants.ISPEC_MEM_SIZE, "", mem_size),
1259 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260 (constants.ISPEC_DISK_COUNT, "", disk_count),
1261 (constants.ISPEC_NIC_COUNT, "", nic_count),
1262 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264 for idx, d in enumerate(disk_sizes)]
1267 (_compute_fn(name, qualifier, ipolicy, value)
1268 for (name, qualifier, value) in test_settings))
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272 _compute_fn=_ComputeIPolicySpecViolation):
1273 """Compute if instance meets the specs of ipolicy.
1276 @param ipolicy: The ipolicy to verify against
1277 @type instance: L{objects.Instance}
1278 @param instance: The instance to verify
1279 @param _compute_fn: The function to verify ipolicy (unittest only)
1280 @see: L{_ComputeIPolicySpecViolation}
1283 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286 disk_count = len(instance.disks)
1287 disk_sizes = [disk.size for disk in instance.disks]
1288 nic_count = len(instance.nics)
1290 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291 disk_sizes, spindle_use)
1294 def _ComputeIPolicyInstanceSpecViolation(
1295 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296 """Compute if instance specs meets the specs of ipolicy.
1299 @param ipolicy: The ipolicy to verify against
1300 @param instance_spec: dict
1301 @param instance_spec: The instance spec to verify
1302 @param _compute_fn: The function to verify ipolicy (unittest only)
1303 @see: L{_ComputeIPolicySpecViolation}
1306 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1313 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314 disk_sizes, spindle_use)
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1319 _compute_fn=_ComputeIPolicyInstanceViolation):
1320 """Compute if instance meets the specs of the new target group.
1322 @param ipolicy: The ipolicy to verify
1323 @param instance: The instance object to verify
1324 @param current_group: The current group of the instance
1325 @param target_group: The new group of the instance
1326 @param _compute_fn: The function to verify ipolicy (unittest only)
1327 @see: L{_ComputeIPolicySpecViolation}
1330 if current_group == target_group:
1333 return _compute_fn(ipolicy, instance)
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337 _compute_fn=_ComputeIPolicyNodeViolation):
1338 """Checks that the target node is correct in terms of instance policy.
1340 @param ipolicy: The ipolicy to verify
1341 @param instance: The instance object to verify
1342 @param node: The new node to relocate
1343 @param ignore: Ignore violations of the ipolicy
1344 @param _compute_fn: The function to verify ipolicy (unittest only)
1345 @see: L{_ComputeIPolicySpecViolation}
1348 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1352 msg = ("Instance does not meet target node group's (%s) instance"
1353 " policy: %s") % (node.group, utils.CommaJoin(res))
1357 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361 """Computes a set of any instances that would violate the new ipolicy.
1363 @param old_ipolicy: The current (still in-place) ipolicy
1364 @param new_ipolicy: The new (to become) ipolicy
1365 @param instances: List of instances to verify
1366 @return: A list of instances which violates the new ipolicy but
1370 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371 _ComputeViolatingInstances(old_ipolicy, instances))
1374 def _ExpandItemName(fn, name, kind):
1375 """Expand an item name.
1377 @param fn: the function to use for expansion
1378 @param name: requested item name
1379 @param kind: text description ('Node' or 'Instance')
1380 @return: the resolved (full) name
1381 @raise errors.OpPrereqError: if the item is not found
1384 full_name = fn(name)
1385 if full_name is None:
1386 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1391 def _ExpandNodeName(cfg, name):
1392 """Wrapper over L{_ExpandItemName} for nodes."""
1393 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1396 def _ExpandInstanceName(cfg, name):
1397 """Wrapper over L{_ExpandItemName} for instance."""
1398 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1403 """Builds network related env variables for hooks
1405 This builds the hook environment from individual variables.
1408 @param name: the name of the network
1409 @type subnet: string
1410 @param subnet: the ipv4 subnet
1411 @type gateway: string
1412 @param gateway: the ipv4 gateway
1413 @type network6: string
1414 @param network6: the ipv6 subnet
1415 @type gateway6: string
1416 @param gateway6: the ipv6 gateway
1417 @type mac_prefix: string
1418 @param mac_prefix: the mac_prefix
1420 @param tags: the tags of the network
1425 env["NETWORK_NAME"] = name
1427 env["NETWORK_SUBNET"] = subnet
1429 env["NETWORK_GATEWAY"] = gateway
1431 env["NETWORK_SUBNET6"] = network6
1433 env["NETWORK_GATEWAY6"] = gateway6
1435 env["NETWORK_MAC_PREFIX"] = mac_prefix
1437 env["NETWORK_TAGS"] = " ".join(tags)
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443 minmem, maxmem, vcpus, nics, disk_template, disks,
1444 bep, hvp, hypervisor_name, tags):
1445 """Builds instance related env variables for hooks
1447 This builds the hook environment from individual variables.
1450 @param name: the name of the instance
1451 @type primary_node: string
1452 @param primary_node: the name of the instance's primary node
1453 @type secondary_nodes: list
1454 @param secondary_nodes: list of secondary nodes as strings
1455 @type os_type: string
1456 @param os_type: the name of the instance's OS
1457 @type status: string
1458 @param status: the desired status of the instance
1459 @type minmem: string
1460 @param minmem: the minimum memory size of the instance
1461 @type maxmem: string
1462 @param maxmem: the maximum memory size of the instance
1464 @param vcpus: the count of VCPUs the instance has
1466 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467 the NICs the instance has
1468 @type disk_template: string
1469 @param disk_template: the disk template of the instance
1471 @param disks: the list of (size, mode) pairs
1473 @param bep: the backend parameters for the instance
1475 @param hvp: the hypervisor parameters for the instance
1476 @type hypervisor_name: string
1477 @param hypervisor_name: the hypervisor for the instance
1479 @param tags: list of instance tags as strings
1481 @return: the hook environment for this instance
1486 "INSTANCE_NAME": name,
1487 "INSTANCE_PRIMARY": primary_node,
1488 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489 "INSTANCE_OS_TYPE": os_type,
1490 "INSTANCE_STATUS": status,
1491 "INSTANCE_MINMEM": minmem,
1492 "INSTANCE_MAXMEM": maxmem,
1493 # TODO(2.7) remove deprecated "memory" value
1494 "INSTANCE_MEMORY": maxmem,
1495 "INSTANCE_VCPUS": vcpus,
1496 "INSTANCE_DISK_TEMPLATE": disk_template,
1497 "INSTANCE_HYPERVISOR": hypervisor_name,
1500 nic_count = len(nics)
1501 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1504 env["INSTANCE_NIC%d_IP" % idx] = ip
1505 env["INSTANCE_NIC%d_MAC" % idx] = mac
1506 env["INSTANCE_NIC%d_MODE" % idx] = mode
1507 env["INSTANCE_NIC%d_LINK" % idx] = link
1509 nobj = objects.Network.FromDict(netinfo)
1510 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1512 # FIXME: broken network reference: the instance NIC specifies a
1513 # network, but the relevant network entry was not in the config. This
1514 # should be made impossible.
1515 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1556 cluster = lu.cfg.GetClusterInfo()
1557 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558 mode = filled_params[constants.NIC_MODE]
1559 link = filled_params[constants.NIC_LINK]
1563 net_uuid = lu.cfg.LookupNetwork(net)
1565 nobj = lu.cfg.GetNetwork(net_uuid)
1566 netinfo = objects.Network.ToDict(nobj)
1567 return (ip, mac, mode, link, net, netinfo)
1570 def _NICListToTuple(lu, nics):
1571 """Build a list of nic information tuples.
1573 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574 value in LUInstanceQueryData.
1576 @type lu: L{LogicalUnit}
1577 @param lu: the logical unit on whose behalf we execute
1578 @type nics: list of L{objects.NIC}
1579 @param nics: list of nics to convert to hooks tuples
1584 hooks_nics.append(_NICToTuple(lu, nic))
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589 """Builds instance related env variables for hooks from an object.
1591 @type lu: L{LogicalUnit}
1592 @param lu: the logical unit on whose behalf we execute
1593 @type instance: L{objects.Instance}
1594 @param instance: the instance for which we should build the
1596 @type override: dict
1597 @param override: dictionary with key/values that will override
1600 @return: the hook environment dictionary
1603 cluster = lu.cfg.GetClusterInfo()
1604 bep = cluster.FillBE(instance)
1605 hvp = cluster.FillHV(instance)
1607 "name": instance.name,
1608 "primary_node": instance.primary_node,
1609 "secondary_nodes": instance.secondary_nodes,
1610 "os_type": instance.os,
1611 "status": instance.admin_state,
1612 "maxmem": bep[constants.BE_MAXMEM],
1613 "minmem": bep[constants.BE_MINMEM],
1614 "vcpus": bep[constants.BE_VCPUS],
1615 "nics": _NICListToTuple(lu, instance.nics),
1616 "disk_template": instance.disk_template,
1617 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1620 "hypervisor_name": instance.hypervisor,
1621 "tags": instance.tags,
1624 args.update(override)
1625 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1628 def _AdjustCandidatePool(lu, exceptions):
1629 """Adjust the candidate pool after node operations.
1632 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1634 lu.LogInfo("Promoted nodes to master candidate role: %s",
1635 utils.CommaJoin(node.name for node in mod_list))
1636 for name in mod_list:
1637 lu.context.ReaddNode(name)
1638 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1640 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645 """Decide whether I should promote myself as a master candidate.
1648 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650 # the new node will increase mc_max with one, so:
1651 mc_should = min(mc_should + 1, cp_size)
1652 return mc_now < mc_should
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656 """Computes a set of instances who violates given ipolicy.
1658 @param ipolicy: The ipolicy to verify
1659 @type instances: object.Instance
1660 @param instances: List of instances to verify
1661 @return: A frozenset of instance names violating the ipolicy
1664 return frozenset([inst.name for inst in instances
1665 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669 """Check that the brigdes needed by a list of nics exist.
1672 cluster = lu.cfg.GetClusterInfo()
1673 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674 brlist = [params[constants.NIC_LINK] for params in paramslist
1675 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1677 result = lu.rpc.call_bridges_exist(target_node, brlist)
1678 result.Raise("Error checking bridges on destination node '%s'" %
1679 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683 """Check that the brigdes needed by an instance exist.
1687 node = instance.primary_node
1688 _CheckNicsBridgesExist(lu, instance.nics, node)
1691 def _CheckOSVariant(os_obj, name):
1692 """Check whether an OS name conforms to the os variants specification.
1694 @type os_obj: L{objects.OS}
1695 @param os_obj: OS object to check
1697 @param name: OS name passed by the user, to check for validity
1700 variant = objects.OS.GetVariant(name)
1701 if not os_obj.supported_variants:
1703 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704 " passed)" % (os_obj.name, variant),
1708 raise errors.OpPrereqError("OS name must include a variant",
1711 if variant not in os_obj.supported_variants:
1712 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1715 def _GetNodeInstancesInner(cfg, fn):
1716 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1719 def _GetNodeInstances(cfg, node_name):
1720 """Returns a list of all primary and secondary instances on a node.
1724 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728 """Returns primary instances on a node.
1731 return _GetNodeInstancesInner(cfg,
1732 lambda inst: node_name == inst.primary_node)
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736 """Returns secondary instances on a node.
1739 return _GetNodeInstancesInner(cfg,
1740 lambda inst: node_name in inst.secondary_nodes)
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744 """Returns the arguments for a storage type.
1747 # Special case for file storage
1748 if storage_type == constants.ST_FILE:
1749 # storage.FileStorage wants a list of storage directories
1750 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1758 for dev in instance.disks:
1759 cfg.SetDiskID(dev, node_name)
1761 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1763 result.Raise("Failed to get disk status from node %s" % node_name,
1764 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1766 for idx, bdev_status in enumerate(result.payload):
1767 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774 """Check the sanity of iallocator and node arguments and use the
1775 cluster-wide iallocator if appropriate.
1777 Check that at most one of (iallocator, node) is specified. If none is
1778 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779 then the LU's opcode's iallocator slot is filled with the cluster-wide
1782 @type iallocator_slot: string
1783 @param iallocator_slot: the name of the opcode iallocator slot
1784 @type node_slot: string
1785 @param node_slot: the name of the opcode target node slot
1788 node = getattr(lu.op, node_slot, None)
1789 ialloc = getattr(lu.op, iallocator_slot, None)
1793 if node is not None and ialloc is not None:
1794 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1796 elif ((node is None and ialloc is None) or
1797 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798 default_iallocator = lu.cfg.GetDefaultIAllocator()
1799 if default_iallocator:
1800 setattr(lu.op, iallocator_slot, default_iallocator)
1802 raise errors.OpPrereqError("No iallocator or node given and no"
1803 " cluster-wide default iallocator found;"
1804 " please specify either an iallocator or a"
1805 " node, or set a cluster-wide default"
1806 " iallocator", errors.ECODE_INVAL)
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810 """Decides on which iallocator to use.
1812 @type cfg: L{config.ConfigWriter}
1813 @param cfg: Cluster configuration object
1814 @type ialloc: string or None
1815 @param ialloc: Iallocator specified in opcode
1817 @return: Iallocator name
1821 # Use default iallocator
1822 ialloc = cfg.GetDefaultIAllocator()
1825 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826 " opcode nor as a cluster-wide default",
1832 def _CheckHostnameSane(lu, name):
1833 """Ensures that a given hostname resolves to a 'sane' name.
1835 The given name is required to be a prefix of the resolved hostname,
1836 to prevent accidental mismatches.
1838 @param lu: the logical unit on behalf of which we're checking
1839 @param name: the name we should resolve and check
1840 @return: the resolved hostname object
1843 hostname = netutils.GetHostname(name=name)
1844 if hostname.name != name:
1845 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846 if not utils.MatchNameComponent(name, [hostname.name]):
1847 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848 " same as given hostname '%s'") %
1849 (hostname.name, name), errors.ECODE_INVAL)
1853 class LUClusterPostInit(LogicalUnit):
1854 """Logical unit for running hooks after cluster initialization.
1857 HPATH = "cluster-init"
1858 HTYPE = constants.HTYPE_CLUSTER
1860 def BuildHooksEnv(self):
1865 "OP_TARGET": self.cfg.GetClusterName(),
1868 def BuildHooksNodes(self):
1869 """Build hooks nodes.
1872 return ([], [self.cfg.GetMasterNode()])
1874 def Exec(self, feedback_fn):
1881 class LUClusterDestroy(LogicalUnit):
1882 """Logical unit for destroying the cluster.
1885 HPATH = "cluster-destroy"
1886 HTYPE = constants.HTYPE_CLUSTER
1888 def BuildHooksEnv(self):
1893 "OP_TARGET": self.cfg.GetClusterName(),
1896 def BuildHooksNodes(self):
1897 """Build hooks nodes.
1902 def CheckPrereq(self):
1903 """Check prerequisites.
1905 This checks whether the cluster is empty.
1907 Any errors are signaled by raising errors.OpPrereqError.
1910 master = self.cfg.GetMasterNode()
1912 nodelist = self.cfg.GetNodeList()
1913 if len(nodelist) != 1 or nodelist[0] != master:
1914 raise errors.OpPrereqError("There are still %d node(s) in"
1915 " this cluster." % (len(nodelist) - 1),
1917 instancelist = self.cfg.GetInstanceList()
1919 raise errors.OpPrereqError("There are still %d instance(s) in"
1920 " this cluster." % len(instancelist),
1923 def Exec(self, feedback_fn):
1924 """Destroys the cluster.
1927 master_params = self.cfg.GetMasterNetworkParameters()
1929 # Run post hooks on master node before it's removed
1930 _RunPostHook(self, master_params.name)
1932 ems = self.cfg.GetUseExternalMipScript()
1933 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1936 self.LogWarning("Error disabling the master IP address: %s",
1939 return master_params.name
1942 def _VerifyCertificate(filename):
1943 """Verifies a certificate for L{LUClusterVerifyConfig}.
1945 @type filename: string
1946 @param filename: Path to PEM file
1950 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951 utils.ReadFile(filename))
1952 except Exception, err: # pylint: disable=W0703
1953 return (LUClusterVerifyConfig.ETYPE_ERROR,
1954 "Failed to load X509 certificate %s: %s" % (filename, err))
1957 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958 constants.SSL_CERT_EXPIRATION_ERROR)
1961 fnamemsg = "While verifying %s: %s" % (filename, msg)
1966 return (None, fnamemsg)
1967 elif errcode == utils.CERT_WARNING:
1968 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969 elif errcode == utils.CERT_ERROR:
1970 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1972 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1975 def _GetAllHypervisorParameters(cluster, instances):
1976 """Compute the set of all hypervisor parameters.
1978 @type cluster: L{objects.Cluster}
1979 @param cluster: the cluster object
1980 @param instances: list of L{objects.Instance}
1981 @param instances: additional instances from which to obtain parameters
1982 @rtype: list of (origin, hypervisor, parameters)
1983 @return: a list with all parameters found, indicating the hypervisor they
1984 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1989 for hv_name in cluster.enabled_hypervisors:
1990 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1992 for os_name, os_hvp in cluster.os_hvp.items():
1993 for hv_name, hv_params in os_hvp.items():
1995 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1998 # TODO: collapse identical parameter values in a single one
1999 for instance in instances:
2000 if instance.hvparams:
2001 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002 cluster.FillHV(instance)))
2007 class _VerifyErrors(object):
2008 """Mix-in for cluster/group verify LUs.
2010 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011 self.op and self._feedback_fn to be available.)
2015 ETYPE_FIELD = "code"
2016 ETYPE_ERROR = "ERROR"
2017 ETYPE_WARNING = "WARNING"
2019 def _Error(self, ecode, item, msg, *args, **kwargs):
2020 """Format an error message.
2022 Based on the opcode's error_codes parameter, either format a
2023 parseable error code, or a simpler error string.
2025 This must be called only from Exec and functions called from Exec.
2028 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029 itype, etxt, _ = ecode
2030 # If the error code is in the list of ignored errors, demote the error to a
2032 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2033 ltype = self.ETYPE_WARNING
2034 # first complete the msg
2037 # then format the whole message
2038 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2039 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2045 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2046 # and finally report it via the feedback_fn
2047 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2048 # do not mark the operation as failed for WARN cases only
2049 if ltype == self.ETYPE_ERROR:
2052 def _ErrorIf(self, cond, *args, **kwargs):
2053 """Log an error message if the passed condition is True.
2057 or self.op.debug_simulate_errors): # pylint: disable=E1101
2058 self._Error(*args, **kwargs)
2061 class LUClusterVerify(NoHooksLU):
2062 """Submits all jobs necessary to verify the cluster.
2067 def ExpandNames(self):
2068 self.needed_locks = {}
2070 def Exec(self, feedback_fn):
2073 if self.op.group_name:
2074 groups = [self.op.group_name]
2075 depends_fn = lambda: None
2077 groups = self.cfg.GetNodeGroupList()
2079 # Verify global configuration
2081 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2084 # Always depend on global verification
2085 depends_fn = lambda: [(-len(jobs), [])]
2088 [opcodes.OpClusterVerifyGroup(group_name=group,
2089 ignore_errors=self.op.ignore_errors,
2090 depends=depends_fn())]
2091 for group in groups)
2093 # Fix up all parameters
2094 for op in itertools.chain(*jobs): # pylint: disable=W0142
2095 op.debug_simulate_errors = self.op.debug_simulate_errors
2096 op.verbose = self.op.verbose
2097 op.error_codes = self.op.error_codes
2099 op.skip_checks = self.op.skip_checks
2100 except AttributeError:
2101 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2103 return ResultWithJobs(jobs)
2106 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2107 """Verifies the cluster config.
2112 def _VerifyHVP(self, hvp_data):
2113 """Verifies locally the syntax of the hypervisor parameters.
2116 for item, hv_name, hv_params in hvp_data:
2117 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2120 hv_class = hypervisor.GetHypervisorClass(hv_name)
2121 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2122 hv_class.CheckParameterSyntax(hv_params)
2123 except errors.GenericError, err:
2124 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2126 def ExpandNames(self):
2127 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2128 self.share_locks = _ShareAll()
2130 def CheckPrereq(self):
2131 """Check prerequisites.
2134 # Retrieve all information
2135 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2136 self.all_node_info = self.cfg.GetAllNodesInfo()
2137 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2139 def Exec(self, feedback_fn):
2140 """Verify integrity of cluster, performing various test on nodes.
2144 self._feedback_fn = feedback_fn
2146 feedback_fn("* Verifying cluster config")
2148 for msg in self.cfg.VerifyConfig():
2149 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2151 feedback_fn("* Verifying cluster certificate files")
2153 for cert_filename in pathutils.ALL_CERT_FILES:
2154 (errcode, msg) = _VerifyCertificate(cert_filename)
2155 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2157 feedback_fn("* Verifying hypervisor parameters")
2159 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2160 self.all_inst_info.values()))
2162 feedback_fn("* Verifying all nodes belong to an existing group")
2164 # We do this verification here because, should this bogus circumstance
2165 # occur, it would never be caught by VerifyGroup, which only acts on
2166 # nodes/instances reachable from existing node groups.
2168 dangling_nodes = set(node.name for node in self.all_node_info.values()
2169 if node.group not in self.all_group_info)
2171 dangling_instances = {}
2172 no_node_instances = []
2174 for inst in self.all_inst_info.values():
2175 if inst.primary_node in dangling_nodes:
2176 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2177 elif inst.primary_node not in self.all_node_info:
2178 no_node_instances.append(inst.name)
2183 utils.CommaJoin(dangling_instances.get(node.name,
2185 for node in dangling_nodes]
2187 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2189 "the following nodes (and their instances) belong to a non"
2190 " existing group: %s", utils.CommaJoin(pretty_dangling))
2192 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2194 "the following instances have a non-existing primary-node:"
2195 " %s", utils.CommaJoin(no_node_instances))
2200 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2201 """Verifies the status of a node group.
2204 HPATH = "cluster-verify"
2205 HTYPE = constants.HTYPE_CLUSTER
2208 _HOOKS_INDENT_RE = re.compile("^", re.M)
2210 class NodeImage(object):
2211 """A class representing the logical and physical status of a node.
2214 @ivar name: the node name to which this object refers
2215 @ivar volumes: a structure as returned from
2216 L{ganeti.backend.GetVolumeList} (runtime)
2217 @ivar instances: a list of running instances (runtime)
2218 @ivar pinst: list of configured primary instances (config)
2219 @ivar sinst: list of configured secondary instances (config)
2220 @ivar sbp: dictionary of {primary-node: list of instances} for all
2221 instances for which this node is secondary (config)
2222 @ivar mfree: free memory, as reported by hypervisor (runtime)
2223 @ivar dfree: free disk, as reported by the node (runtime)
2224 @ivar offline: the offline status (config)
2225 @type rpc_fail: boolean
2226 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2227 not whether the individual keys were correct) (runtime)
2228 @type lvm_fail: boolean
2229 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2230 @type hyp_fail: boolean
2231 @ivar hyp_fail: whether the RPC call didn't return the instance list
2232 @type ghost: boolean
2233 @ivar ghost: whether this is a known node or not (config)
2234 @type os_fail: boolean
2235 @ivar os_fail: whether the RPC call didn't return valid OS data
2237 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2238 @type vm_capable: boolean
2239 @ivar vm_capable: whether the node can host instances
2241 @ivar pv_min: size in MiB of the smallest PVs
2243 @ivar pv_max: size in MiB of the biggest PVs
2246 def __init__(self, offline=False, name=None, vm_capable=True):
2255 self.offline = offline
2256 self.vm_capable = vm_capable
2257 self.rpc_fail = False
2258 self.lvm_fail = False
2259 self.hyp_fail = False
2261 self.os_fail = False
2266 def ExpandNames(self):
2267 # This raises errors.OpPrereqError on its own:
2268 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2270 # Get instances in node group; this is unsafe and needs verification later
2272 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2274 self.needed_locks = {
2275 locking.LEVEL_INSTANCE: inst_names,
2276 locking.LEVEL_NODEGROUP: [self.group_uuid],
2277 locking.LEVEL_NODE: [],
2279 # This opcode is run by watcher every five minutes and acquires all nodes
2280 # for a group. It doesn't run for a long time, so it's better to acquire
2281 # the node allocation lock as well.
2282 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2285 self.share_locks = _ShareAll()
2287 def DeclareLocks(self, level):
2288 if level == locking.LEVEL_NODE:
2289 # Get members of node group; this is unsafe and needs verification later
2290 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2292 all_inst_info = self.cfg.GetAllInstancesInfo()
2294 # In Exec(), we warn about mirrored instances that have primary and
2295 # secondary living in separate node groups. To fully verify that
2296 # volumes for these instances are healthy, we will need to do an
2297 # extra call to their secondaries. We ensure here those nodes will
2299 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2300 # Important: access only the instances whose lock is owned
2301 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2302 nodes.update(all_inst_info[inst].secondary_nodes)
2304 self.needed_locks[locking.LEVEL_NODE] = nodes
2306 def CheckPrereq(self):
2307 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2308 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2310 group_nodes = set(self.group_info.members)
2312 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2315 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2317 unlocked_instances = \
2318 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2321 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2322 utils.CommaJoin(unlocked_nodes),
2325 if unlocked_instances:
2326 raise errors.OpPrereqError("Missing lock for instances: %s" %
2327 utils.CommaJoin(unlocked_instances),
2330 self.all_node_info = self.cfg.GetAllNodesInfo()
2331 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2333 self.my_node_names = utils.NiceSort(group_nodes)
2334 self.my_inst_names = utils.NiceSort(group_instances)
2336 self.my_node_info = dict((name, self.all_node_info[name])
2337 for name in self.my_node_names)
2339 self.my_inst_info = dict((name, self.all_inst_info[name])
2340 for name in self.my_inst_names)
2342 # We detect here the nodes that will need the extra RPC calls for verifying
2343 # split LV volumes; they should be locked.
2344 extra_lv_nodes = set()
2346 for inst in self.my_inst_info.values():
2347 if inst.disk_template in constants.DTS_INT_MIRROR:
2348 for nname in inst.all_nodes:
2349 if self.all_node_info[nname].group != self.group_uuid:
2350 extra_lv_nodes.add(nname)
2352 unlocked_lv_nodes = \
2353 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2355 if unlocked_lv_nodes:
2356 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2357 utils.CommaJoin(unlocked_lv_nodes),
2359 self.extra_lv_nodes = list(extra_lv_nodes)
2361 def _VerifyNode(self, ninfo, nresult):
2362 """Perform some basic validation on data returned from a node.
2364 - check the result data structure is well formed and has all the
2366 - check ganeti version
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the results from the node
2372 @return: whether overall this call was successful (and we can expect
2373 reasonable values in the respose)
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 # main result, nresult should be a non-empty dict
2380 test = not nresult or not isinstance(nresult, dict)
2381 _ErrorIf(test, constants.CV_ENODERPC, node,
2382 "unable to verify node: no data returned")
2386 # compares ganeti version
2387 local_version = constants.PROTOCOL_VERSION
2388 remote_version = nresult.get("version", None)
2389 test = not (remote_version and
2390 isinstance(remote_version, (list, tuple)) and
2391 len(remote_version) == 2)
2392 _ErrorIf(test, constants.CV_ENODERPC, node,
2393 "connection to node returned invalid data")
2397 test = local_version != remote_version[0]
2398 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2399 "incompatible protocol versions: master %s,"
2400 " node %s", local_version, remote_version[0])
2404 # node seems compatible, we can actually try to look into its results
2406 # full package version
2407 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2408 constants.CV_ENODEVERSION, node,
2409 "software version mismatch: master %s, node %s",
2410 constants.RELEASE_VERSION, remote_version[1],
2411 code=self.ETYPE_WARNING)
2413 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2414 if ninfo.vm_capable and isinstance(hyp_result, dict):
2415 for hv_name, hv_result in hyp_result.iteritems():
2416 test = hv_result is not None
2417 _ErrorIf(test, constants.CV_ENODEHV, node,
2418 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2420 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2421 if ninfo.vm_capable and isinstance(hvp_result, list):
2422 for item, hv_name, hv_result in hvp_result:
2423 _ErrorIf(True, constants.CV_ENODEHV, node,
2424 "hypervisor %s parameter verify failure (source %s): %s",
2425 hv_name, item, hv_result)
2427 test = nresult.get(constants.NV_NODESETUP,
2428 ["Missing NODESETUP results"])
2429 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2434 def _VerifyNodeTime(self, ninfo, nresult,
2435 nvinfo_starttime, nvinfo_endtime):
2436 """Check the node time.
2438 @type ninfo: L{objects.Node}
2439 @param ninfo: the node to check
2440 @param nresult: the remote results for the node
2441 @param nvinfo_starttime: the start time of the RPC call
2442 @param nvinfo_endtime: the end time of the RPC call
2446 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2448 ntime = nresult.get(constants.NV_TIME, None)
2450 ntime_merged = utils.MergeTime(ntime)
2451 except (ValueError, TypeError):
2452 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2455 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2456 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2457 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2458 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2462 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2463 "Node time diverges by at least %s from master node time",
2466 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2467 """Check the node LVM results and update info for cross-node checks.
2469 @type ninfo: L{objects.Node}
2470 @param ninfo: the node to check
2471 @param nresult: the remote results for the node
2472 @param vg_name: the configured VG name
2473 @type nimg: L{NodeImage}
2474 @param nimg: node image
2481 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2483 # checks vg existence and size > 20G
2484 vglist = nresult.get(constants.NV_VGLIST, None)
2486 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2488 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2489 constants.MIN_VG_SIZE)
2490 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2493 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2495 self._Error(constants.CV_ENODELVM, node, em)
2496 if pvminmax is not None:
2497 (nimg.pv_min, nimg.pv_max) = pvminmax
2499 def _VerifyGroupLVM(self, node_image, vg_name):
2500 """Check cross-node consistency in LVM.
2502 @type node_image: dict
2503 @param node_image: info about nodes, mapping from node to names to
2504 L{NodeImage} objects
2505 @param vg_name: the configured VG name
2511 # Only exlcusive storage needs this kind of checks
2512 if not self._exclusive_storage:
2515 # exclusive_storage wants all PVs to have the same size (approximately),
2516 # if the smallest and the biggest ones are okay, everything is fine.
2517 # pv_min is None iff pv_max is None
2518 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2521 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2522 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2523 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2524 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2525 "PV sizes differ too much in the group; smallest (%s MB) is"
2526 " on %s, biggest (%s MB) is on %s",
2527 pvmin, minnode, pvmax, maxnode)
2529 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2530 """Check the node bridges.
2532 @type ninfo: L{objects.Node}
2533 @param ninfo: the node to check
2534 @param nresult: the remote results for the node
2535 @param bridges: the expected list of bridges
2542 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2544 missing = nresult.get(constants.NV_BRIDGES, None)
2545 test = not isinstance(missing, list)
2546 _ErrorIf(test, constants.CV_ENODENET, node,
2547 "did not return valid bridge information")
2549 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2550 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2552 def _VerifyNodeUserScripts(self, ninfo, nresult):
2553 """Check the results of user scripts presence and executability on the node
2555 @type ninfo: L{objects.Node}
2556 @param ninfo: the node to check
2557 @param nresult: the remote results for the node
2562 test = not constants.NV_USERSCRIPTS in nresult
2563 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2564 "did not return user scripts information")
2566 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2568 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2569 "user scripts not present or not executable: %s" %
2570 utils.CommaJoin(sorted(broken_scripts)))
2572 def _VerifyNodeNetwork(self, ninfo, nresult):
2573 """Check the node network connectivity results.
2575 @type ninfo: L{objects.Node}
2576 @param ninfo: the node to check
2577 @param nresult: the remote results for the node
2581 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2583 test = constants.NV_NODELIST not in nresult
2584 _ErrorIf(test, constants.CV_ENODESSH, node,
2585 "node hasn't returned node ssh connectivity data")
2587 if nresult[constants.NV_NODELIST]:
2588 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2589 _ErrorIf(True, constants.CV_ENODESSH, node,
2590 "ssh communication with node '%s': %s", a_node, a_msg)
2592 test = constants.NV_NODENETTEST not in nresult
2593 _ErrorIf(test, constants.CV_ENODENET, node,
2594 "node hasn't returned node tcp connectivity data")
2596 if nresult[constants.NV_NODENETTEST]:
2597 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2599 _ErrorIf(True, constants.CV_ENODENET, node,
2600 "tcp communication with node '%s': %s",
2601 anode, nresult[constants.NV_NODENETTEST][anode])
2603 test = constants.NV_MASTERIP not in nresult
2604 _ErrorIf(test, constants.CV_ENODENET, node,
2605 "node hasn't returned node master IP reachability data")
2607 if not nresult[constants.NV_MASTERIP]:
2608 if node == self.master_node:
2609 msg = "the master node cannot reach the master IP (not configured?)"
2611 msg = "cannot reach the master IP"
2612 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2614 def _VerifyInstance(self, instance, inst_config, node_image,
2616 """Verify an instance.
2618 This function checks to see if the required block devices are
2619 available on the instance's node, and that the nodes are in the correct
2623 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624 pnode = inst_config.primary_node
2625 pnode_img = node_image[pnode]
2626 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2628 node_vol_should = {}
2629 inst_config.MapLVsByNode(node_vol_should)
2631 cluster = self.cfg.GetClusterInfo()
2632 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2634 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2635 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2636 code=self.ETYPE_WARNING)
2638 for node in node_vol_should:
2639 n_img = node_image[node]
2640 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2641 # ignore missing volumes on offline or broken nodes
2643 for volume in node_vol_should[node]:
2644 test = volume not in n_img.volumes
2645 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2646 "volume %s missing on node %s", volume, node)
2648 if inst_config.admin_state == constants.ADMINST_UP:
2649 test = instance not in pnode_img.instances and not pnode_img.offline
2650 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2651 "instance not running on its primary node %s",
2653 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2654 "instance is marked as running and lives on offline node %s",
2657 diskdata = [(nname, success, status, idx)
2658 for (nname, disks) in diskstatus.items()
2659 for idx, (success, status) in enumerate(disks)]
2661 for nname, success, bdev_status, idx in diskdata:
2662 # the 'ghost node' construction in Exec() ensures that we have a
2664 snode = node_image[nname]
2665 bad_snode = snode.ghost or snode.offline
2666 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2667 not success and not bad_snode,
2668 constants.CV_EINSTANCEFAULTYDISK, instance,
2669 "couldn't retrieve status for disk/%s on %s: %s",
2670 idx, nname, bdev_status)
2671 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2672 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2673 constants.CV_EINSTANCEFAULTYDISK, instance,
2674 "disk/%s on %s is faulty", idx, nname)
2676 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2677 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2678 " primary node failed", instance)
2680 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2681 constants.CV_EINSTANCELAYOUT,
2682 instance, "instance has multiple secondary nodes: %s",
2683 utils.CommaJoin(inst_config.secondary_nodes),
2684 code=self.ETYPE_WARNING)
2686 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2687 # Disk template not compatible with exclusive_storage: no instance
2688 # node should have the flag set
2689 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2690 inst_config.all_nodes)
2691 es_nodes = [n for (n, es) in es_flags.items()
2693 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2694 "instance has template %s, which is not supported on nodes"
2695 " that have exclusive storage set: %s",
2696 inst_config.disk_template, utils.CommaJoin(es_nodes))
2698 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2699 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2700 instance_groups = {}
2702 for node in instance_nodes:
2703 instance_groups.setdefault(self.all_node_info[node].group,
2707 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2708 # Sort so that we always list the primary node first.
2709 for group, nodes in sorted(instance_groups.items(),
2710 key=lambda (_, nodes): pnode in nodes,
2713 self._ErrorIf(len(instance_groups) > 1,
2714 constants.CV_EINSTANCESPLITGROUPS,
2715 instance, "instance has primary and secondary nodes in"
2716 " different groups: %s", utils.CommaJoin(pretty_list),
2717 code=self.ETYPE_WARNING)
2719 inst_nodes_offline = []
2720 for snode in inst_config.secondary_nodes:
2721 s_img = node_image[snode]
2722 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2723 snode, "instance %s, connection to secondary node failed",
2727 inst_nodes_offline.append(snode)
2729 # warn that the instance lives on offline nodes
2730 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2731 "instance has offline secondary node(s) %s",
2732 utils.CommaJoin(inst_nodes_offline))
2733 # ... or ghost/non-vm_capable nodes
2734 for node in inst_config.all_nodes:
2735 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2736 instance, "instance lives on ghost node %s", node)
2737 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2738 instance, "instance lives on non-vm_capable node %s", node)
2740 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2741 """Verify if there are any unknown volumes in the cluster.
2743 The .os, .swap and backup volumes are ignored. All other volumes are
2744 reported as unknown.
2746 @type reserved: L{ganeti.utils.FieldSet}
2747 @param reserved: a FieldSet of reserved volume names
2750 for node, n_img in node_image.items():
2751 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2752 self.all_node_info[node].group != self.group_uuid):
2753 # skip non-healthy nodes
2755 for volume in n_img.volumes:
2756 test = ((node not in node_vol_should or
2757 volume not in node_vol_should[node]) and
2758 not reserved.Matches(volume))
2759 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2760 "volume %s is unknown", volume)
2762 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2763 """Verify N+1 Memory Resilience.
2765 Check that if one single node dies we can still start all the
2766 instances it was primary for.
2769 cluster_info = self.cfg.GetClusterInfo()
2770 for node, n_img in node_image.items():
2771 # This code checks that every node which is now listed as
2772 # secondary has enough memory to host all instances it is
2773 # supposed to should a single other node in the cluster fail.
2774 # FIXME: not ready for failover to an arbitrary node
2775 # FIXME: does not support file-backed instances
2776 # WARNING: we currently take into account down instances as well
2777 # as up ones, considering that even if they're down someone
2778 # might want to start them even in the event of a node failure.
2779 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2780 # we're skipping nodes marked offline and nodes in other groups from
2781 # the N+1 warning, since most likely we don't have good memory
2782 # infromation from them; we already list instances living on such
2783 # nodes, and that's enough warning
2785 #TODO(dynmem): also consider ballooning out other instances
2786 for prinode, instances in n_img.sbp.items():
2788 for instance in instances:
2789 bep = cluster_info.FillBE(instance_cfg[instance])
2790 if bep[constants.BE_AUTO_BALANCE]:
2791 needed_mem += bep[constants.BE_MINMEM]
2792 test = n_img.mfree < needed_mem
2793 self._ErrorIf(test, constants.CV_ENODEN1, node,
2794 "not enough memory to accomodate instance failovers"
2795 " should node %s fail (%dMiB needed, %dMiB available)",
2796 prinode, needed_mem, n_img.mfree)
2799 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2800 (files_all, files_opt, files_mc, files_vm)):
2801 """Verifies file checksums collected from all nodes.
2803 @param errorif: Callback for reporting errors
2804 @param nodeinfo: List of L{objects.Node} objects
2805 @param master_node: Name of master node
2806 @param all_nvinfo: RPC results
2809 # Define functions determining which nodes to consider for a file
2812 (files_mc, lambda node: (node.master_candidate or
2813 node.name == master_node)),
2814 (files_vm, lambda node: node.vm_capable),
2817 # Build mapping from filename to list of nodes which should have the file
2819 for (files, fn) in files2nodefn:
2821 filenodes = nodeinfo
2823 filenodes = filter(fn, nodeinfo)
2824 nodefiles.update((filename,
2825 frozenset(map(operator.attrgetter("name"), filenodes)))
2826 for filename in files)
2828 assert set(nodefiles) == (files_all | files_mc | files_vm)
2830 fileinfo = dict((filename, {}) for filename in nodefiles)
2831 ignore_nodes = set()
2833 for node in nodeinfo:
2835 ignore_nodes.add(node.name)
2838 nresult = all_nvinfo[node.name]
2840 if nresult.fail_msg or not nresult.payload:
2843 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2844 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2845 for (key, value) in fingerprints.items())
2848 test = not (node_files and isinstance(node_files, dict))
2849 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2850 "Node did not return file checksum data")
2852 ignore_nodes.add(node.name)
2855 # Build per-checksum mapping from filename to nodes having it
2856 for (filename, checksum) in node_files.items():
2857 assert filename in nodefiles
2858 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2860 for (filename, checksums) in fileinfo.items():
2861 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2863 # Nodes having the file
2864 with_file = frozenset(node_name
2865 for nodes in fileinfo[filename].values()
2866 for node_name in nodes) - ignore_nodes
2868 expected_nodes = nodefiles[filename] - ignore_nodes
2870 # Nodes missing file
2871 missing_file = expected_nodes - with_file
2873 if filename in files_opt:
2875 errorif(missing_file and missing_file != expected_nodes,
2876 constants.CV_ECLUSTERFILECHECK, None,
2877 "File %s is optional, but it must exist on all or no"
2878 " nodes (not found on %s)",
2879 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2881 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2882 "File %s is missing from node(s) %s", filename,
2883 utils.CommaJoin(utils.NiceSort(missing_file)))
2885 # Warn if a node has a file it shouldn't
2886 unexpected = with_file - expected_nodes
2888 constants.CV_ECLUSTERFILECHECK, None,
2889 "File %s should not exist on node(s) %s",
2890 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2892 # See if there are multiple versions of the file
2893 test = len(checksums) > 1
2895 variants = ["variant %s on %s" %
2896 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2897 for (idx, (checksum, nodes)) in
2898 enumerate(sorted(checksums.items()))]
2902 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2903 "File %s found with %s different checksums (%s)",
2904 filename, len(checksums), "; ".join(variants))
2906 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2908 """Verifies and the node DRBD status.
2910 @type ninfo: L{objects.Node}
2911 @param ninfo: the node to check
2912 @param nresult: the remote results for the node
2913 @param instanceinfo: the dict of instances
2914 @param drbd_helper: the configured DRBD usermode helper
2915 @param drbd_map: the DRBD map as returned by
2916 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2920 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2923 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2924 test = (helper_result is None)
2925 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926 "no drbd usermode helper returned")
2928 status, payload = helper_result
2930 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931 "drbd usermode helper check unsuccessful: %s", payload)
2932 test = status and (payload != drbd_helper)
2933 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2934 "wrong drbd usermode helper: %s", payload)
2936 # compute the DRBD minors
2938 for minor, instance in drbd_map[node].items():
2939 test = instance not in instanceinfo
2940 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2941 "ghost instance '%s' in temporary DRBD map", instance)
2942 # ghost instance should not be running, but otherwise we
2943 # don't give double warnings (both ghost instance and
2944 # unallocated minor in use)
2946 node_drbd[minor] = (instance, False)
2948 instance = instanceinfo[instance]
2949 node_drbd[minor] = (instance.name,
2950 instance.admin_state == constants.ADMINST_UP)
2952 # and now check them
2953 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2954 test = not isinstance(used_minors, (tuple, list))
2955 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2956 "cannot parse drbd status file: %s", str(used_minors))
2958 # we cannot check drbd status
2961 for minor, (iname, must_exist) in node_drbd.items():
2962 test = minor not in used_minors and must_exist
2963 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964 "drbd minor %d of instance %s is not active", minor, iname)
2965 for minor in used_minors:
2966 test = minor not in node_drbd
2967 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2968 "unallocated drbd minor %d is in use", minor)
2970 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2971 """Builds the node OS structures.
2973 @type ninfo: L{objects.Node}
2974 @param ninfo: the node to check
2975 @param nresult: the remote results for the node
2976 @param nimg: the node image object
2980 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2982 remote_os = nresult.get(constants.NV_OSLIST, None)
2983 test = (not isinstance(remote_os, list) or
2984 not compat.all(isinstance(v, list) and len(v) == 7
2985 for v in remote_os))
2987 _ErrorIf(test, constants.CV_ENODEOS, node,
2988 "node hasn't returned valid OS data")
2997 for (name, os_path, status, diagnose,
2998 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3000 if name not in os_dict:
3003 # parameters is a list of lists instead of list of tuples due to
3004 # JSON lacking a real tuple type, fix it:
3005 parameters = [tuple(v) for v in parameters]
3006 os_dict[name].append((os_path, status, diagnose,
3007 set(variants), set(parameters), set(api_ver)))
3009 nimg.oslist = os_dict
3011 def _VerifyNodeOS(self, ninfo, nimg, base):
3012 """Verifies the node OS list.
3014 @type ninfo: L{objects.Node}
3015 @param ninfo: the node to check
3016 @param nimg: the node image object
3017 @param base: the 'template' node we match against (e.g. from the master)
3021 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3023 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3025 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3026 for os_name, os_data in nimg.oslist.items():
3027 assert os_data, "Empty OS status for OS %s?!" % os_name
3028 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3029 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3030 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3031 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3032 "OS '%s' has multiple entries (first one shadows the rest): %s",
3033 os_name, utils.CommaJoin([v[0] for v in os_data]))
3034 # comparisons with the 'base' image
3035 test = os_name not in base.oslist
3036 _ErrorIf(test, constants.CV_ENODEOS, node,
3037 "Extra OS %s not present on reference node (%s)",
3041 assert base.oslist[os_name], "Base node has empty OS status?"
3042 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3044 # base OS is invalid, skipping
3046 for kind, a, b in [("API version", f_api, b_api),
3047 ("variants list", f_var, b_var),
3048 ("parameters", beautify_params(f_param),
3049 beautify_params(b_param))]:
3050 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3051 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3052 kind, os_name, base.name,
3053 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3055 # check any missing OSes
3056 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3057 _ErrorIf(missing, constants.CV_ENODEOS, node,
3058 "OSes present on reference node %s but missing on this node: %s",
3059 base.name, utils.CommaJoin(missing))
3061 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3062 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3064 @type ninfo: L{objects.Node}
3065 @param ninfo: the node to check
3066 @param nresult: the remote results for the node
3067 @type is_master: bool
3068 @param is_master: Whether node is the master node
3074 (constants.ENABLE_FILE_STORAGE or
3075 constants.ENABLE_SHARED_FILE_STORAGE)):
3077 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3079 # This should never happen
3080 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3081 "Node did not return forbidden file storage paths")
3083 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3084 "Found forbidden file storage paths: %s",
3085 utils.CommaJoin(fspaths))
3087 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3088 constants.CV_ENODEFILESTORAGEPATHS, node,
3089 "Node should not have returned forbidden file storage"
3092 def _VerifyOob(self, ninfo, nresult):
3093 """Verifies out of band functionality of a node.
3095 @type ninfo: L{objects.Node}
3096 @param ninfo: the node to check
3097 @param nresult: the remote results for the node
3101 # We just have to verify the paths on master and/or master candidates
3102 # as the oob helper is invoked on the master
3103 if ((ninfo.master_candidate or ninfo.master_capable) and
3104 constants.NV_OOB_PATHS in nresult):
3105 for path_result in nresult[constants.NV_OOB_PATHS]:
3106 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3108 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3109 """Verifies and updates the node volume data.
3111 This function will update a L{NodeImage}'s internal structures
3112 with data from the remote call.
3114 @type ninfo: L{objects.Node}
3115 @param ninfo: the node to check
3116 @param nresult: the remote results for the node
3117 @param nimg: the node image object
3118 @param vg_name: the configured VG name
3122 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3124 nimg.lvm_fail = True
3125 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3128 elif isinstance(lvdata, basestring):
3129 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3130 utils.SafeEncode(lvdata))
3131 elif not isinstance(lvdata, dict):
3132 _ErrorIf(True, constants.CV_ENODELVM, node,
3133 "rpc call to node failed (lvlist)")
3135 nimg.volumes = lvdata
3136 nimg.lvm_fail = False
3138 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3139 """Verifies and updates the node instance list.
3141 If the listing was successful, then updates this node's instance
3142 list. Otherwise, it marks the RPC call as failed for the instance
3145 @type ninfo: L{objects.Node}
3146 @param ninfo: the node to check
3147 @param nresult: the remote results for the node
3148 @param nimg: the node image object
3151 idata = nresult.get(constants.NV_INSTANCELIST, None)
3152 test = not isinstance(idata, list)
3153 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3154 "rpc call to node failed (instancelist): %s",
3155 utils.SafeEncode(str(idata)))
3157 nimg.hyp_fail = True
3159 nimg.instances = idata
3161 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3162 """Verifies and computes a node information map
3164 @type ninfo: L{objects.Node}
3165 @param ninfo: the node to check
3166 @param nresult: the remote results for the node
3167 @param nimg: the node image object
3168 @param vg_name: the configured VG name
3172 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3174 # try to read free memory (from the hypervisor)
3175 hv_info = nresult.get(constants.NV_HVINFO, None)
3176 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3177 _ErrorIf(test, constants.CV_ENODEHV, node,
3178 "rpc call to node failed (hvinfo)")
3181 nimg.mfree = int(hv_info["memory_free"])
3182 except (ValueError, TypeError):
3183 _ErrorIf(True, constants.CV_ENODERPC, node,
3184 "node returned invalid nodeinfo, check hypervisor")
3186 # FIXME: devise a free space model for file based instances as well
3187 if vg_name is not None:
3188 test = (constants.NV_VGLIST not in nresult or
3189 vg_name not in nresult[constants.NV_VGLIST])
3190 _ErrorIf(test, constants.CV_ENODELVM, node,
3191 "node didn't return data for the volume group '%s'"
3192 " - it is either missing or broken", vg_name)
3195 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3196 except (ValueError, TypeError):
3197 _ErrorIf(True, constants.CV_ENODERPC, node,
3198 "node returned invalid LVM info, check LVM status")
3200 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3201 """Gets per-disk status information for all instances.
3203 @type nodelist: list of strings
3204 @param nodelist: Node names
3205 @type node_image: dict of (name, L{objects.Node})
3206 @param node_image: Node objects
3207 @type instanceinfo: dict of (name, L{objects.Instance})
3208 @param instanceinfo: Instance objects
3209 @rtype: {instance: {node: [(succes, payload)]}}
3210 @return: a dictionary of per-instance dictionaries with nodes as
3211 keys and disk information as values; the disk information is a
3212 list of tuples (success, payload)
3215 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3218 node_disks_devonly = {}
3219 diskless_instances = set()
3220 diskless = constants.DT_DISKLESS
3222 for nname in nodelist:
3223 node_instances = list(itertools.chain(node_image[nname].pinst,
3224 node_image[nname].sinst))
3225 diskless_instances.update(inst for inst in node_instances
3226 if instanceinfo[inst].disk_template == diskless)
3227 disks = [(inst, disk)
3228 for inst in node_instances
3229 for disk in instanceinfo[inst].disks]
3232 # No need to collect data
3235 node_disks[nname] = disks
3237 # _AnnotateDiskParams makes already copies of the disks
3239 for (inst, dev) in disks:
3240 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3241 self.cfg.SetDiskID(anno_disk, nname)
3242 devonly.append(anno_disk)
3244 node_disks_devonly[nname] = devonly
3246 assert len(node_disks) == len(node_disks_devonly)
3248 # Collect data from all nodes with disks
3249 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3252 assert len(result) == len(node_disks)
3256 for (nname, nres) in result.items():
3257 disks = node_disks[nname]
3260 # No data from this node
3261 data = len(disks) * [(False, "node offline")]
3264 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3265 "while getting disk information: %s", msg)
3267 # No data from this node
3268 data = len(disks) * [(False, msg)]
3271 for idx, i in enumerate(nres.payload):
3272 if isinstance(i, (tuple, list)) and len(i) == 2:
3275 logging.warning("Invalid result from node %s, entry %d: %s",
3277 data.append((False, "Invalid result from the remote node"))
3279 for ((inst, _), status) in zip(disks, data):
3280 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3282 # Add empty entries for diskless instances.
3283 for inst in diskless_instances:
3284 assert inst not in instdisk
3287 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3288 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3289 compat.all(isinstance(s, (tuple, list)) and
3290 len(s) == 2 for s in statuses)
3291 for inst, nnames in instdisk.items()
3292 for nname, statuses in nnames.items())
3294 instdisk_keys = set(instdisk)
3295 instanceinfo_keys = set(instanceinfo)
3296 assert instdisk_keys == instanceinfo_keys, \
3297 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3298 (instdisk_keys, instanceinfo_keys))
3303 def _SshNodeSelector(group_uuid, all_nodes):
3304 """Create endless iterators for all potential SSH check hosts.
3307 nodes = [node for node in all_nodes
3308 if (node.group != group_uuid and
3310 keyfunc = operator.attrgetter("group")
3312 return map(itertools.cycle,
3313 [sorted(map(operator.attrgetter("name"), names))
3314 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3318 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3319 """Choose which nodes should talk to which other nodes.
3321 We will make nodes contact all nodes in their group, and one node from
3324 @warning: This algorithm has a known issue if one node group is much
3325 smaller than others (e.g. just one node). In such a case all other
3326 nodes will talk to the single node.
3329 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3330 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3332 return (online_nodes,
3333 dict((name, sorted([i.next() for i in sel]))
3334 for name in online_nodes))
3336 def BuildHooksEnv(self):
3339 Cluster-Verify hooks just ran in the post phase and their failure makes
3340 the output be logged in the verify output and the verification to fail.
3344 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3347 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3348 for node in self.my_node_info.values())
3352 def BuildHooksNodes(self):
3353 """Build hooks nodes.
3356 return ([], self.my_node_names)
3358 def Exec(self, feedback_fn):
3359 """Verify integrity of the node group, performing various test on nodes.
3362 # This method has too many local variables. pylint: disable=R0914
3363 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3365 if not self.my_node_names:
3367 feedback_fn("* Empty node group, skipping verification")
3371 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3372 verbose = self.op.verbose
3373 self._feedback_fn = feedback_fn
3375 vg_name = self.cfg.GetVGName()
3376 drbd_helper = self.cfg.GetDRBDHelper()
3377 cluster = self.cfg.GetClusterInfo()
3378 hypervisors = cluster.enabled_hypervisors
3379 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3381 i_non_redundant = [] # Non redundant instances
3382 i_non_a_balanced = [] # Non auto-balanced instances
3383 i_offline = 0 # Count of offline instances
3384 n_offline = 0 # Count of offline nodes
3385 n_drained = 0 # Count of nodes being drained
3386 node_vol_should = {}
3388 # FIXME: verify OS list
3391 filemap = _ComputeAncillaryFiles(cluster, False)
3393 # do local checksums
3394 master_node = self.master_node = self.cfg.GetMasterNode()
3395 master_ip = self.cfg.GetMasterIP()
3397 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3400 if self.cfg.GetUseExternalMipScript():
3401 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3403 node_verify_param = {
3404 constants.NV_FILELIST:
3405 map(vcluster.MakeVirtualPath,
3406 utils.UniqueSequence(filename
3407 for files in filemap
3408 for filename in files)),
3409 constants.NV_NODELIST:
3410 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3411 self.all_node_info.values()),
3412 constants.NV_HYPERVISOR: hypervisors,
3413 constants.NV_HVPARAMS:
3414 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3415 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3416 for node in node_data_list
3417 if not node.offline],
3418 constants.NV_INSTANCELIST: hypervisors,
3419 constants.NV_VERSION: None,
3420 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3421 constants.NV_NODESETUP: None,
3422 constants.NV_TIME: None,
3423 constants.NV_MASTERIP: (master_node, master_ip),
3424 constants.NV_OSLIST: None,
3425 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3426 constants.NV_USERSCRIPTS: user_scripts,
3429 if vg_name is not None:
3430 node_verify_param[constants.NV_VGLIST] = None
3431 node_verify_param[constants.NV_LVLIST] = vg_name
3432 node_verify_param[constants.NV_PVLIST] = [vg_name]
3435 node_verify_param[constants.NV_DRBDLIST] = None
3436 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3438 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3439 # Load file storage paths only from master node
3440 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3443 # FIXME: this needs to be changed per node-group, not cluster-wide
3445 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3446 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447 bridges.add(default_nicpp[constants.NIC_LINK])
3448 for instance in self.my_inst_info.values():
3449 for nic in instance.nics:
3450 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3451 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452 bridges.add(full_nic[constants.NIC_LINK])
3455 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3457 # Build our expected cluster state
3458 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3460 vm_capable=node.vm_capable))
3461 for node in node_data_list)
3465 for node in self.all_node_info.values():
3466 path = _SupportsOob(self.cfg, node)
3467 if path and path not in oob_paths:
3468 oob_paths.append(path)
3471 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3473 for instance in self.my_inst_names:
3474 inst_config = self.my_inst_info[instance]
3475 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3478 for nname in inst_config.all_nodes:
3479 if nname not in node_image:
3480 gnode = self.NodeImage(name=nname)
3481 gnode.ghost = (nname not in self.all_node_info)
3482 node_image[nname] = gnode
3484 inst_config.MapLVsByNode(node_vol_should)
3486 pnode = inst_config.primary_node
3487 node_image[pnode].pinst.append(instance)
3489 for snode in inst_config.secondary_nodes:
3490 nimg = node_image[snode]
3491 nimg.sinst.append(instance)
3492 if pnode not in nimg.sbp:
3493 nimg.sbp[pnode] = []
3494 nimg.sbp[pnode].append(instance)
3496 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3497 # The value of exclusive_storage should be the same across the group, so if
3498 # it's True for at least a node, we act as if it were set for all the nodes
3499 self._exclusive_storage = compat.any(es_flags.values())
3500 if self._exclusive_storage:
3501 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3503 # At this point, we have the in-memory data structures complete,
3504 # except for the runtime information, which we'll gather next
3506 # Due to the way our RPC system works, exact response times cannot be
3507 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3508 # time before and after executing the request, we can at least have a time
3510 nvinfo_starttime = time.time()
3511 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3513 self.cfg.GetClusterName())
3514 nvinfo_endtime = time.time()
3516 if self.extra_lv_nodes and vg_name is not None:
3518 self.rpc.call_node_verify(self.extra_lv_nodes,
3519 {constants.NV_LVLIST: vg_name},
3520 self.cfg.GetClusterName())
3522 extra_lv_nvinfo = {}
3524 all_drbd_map = self.cfg.ComputeDRBDMap()
3526 feedback_fn("* Gathering disk information (%s nodes)" %
3527 len(self.my_node_names))
3528 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3531 feedback_fn("* Verifying configuration file consistency")
3533 # If not all nodes are being checked, we need to make sure the master node
3534 # and a non-checked vm_capable node are in the list.
3535 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3537 vf_nvinfo = all_nvinfo.copy()
3538 vf_node_info = list(self.my_node_info.values())
3539 additional_nodes = []
3540 if master_node not in self.my_node_info:
3541 additional_nodes.append(master_node)
3542 vf_node_info.append(self.all_node_info[master_node])
3543 # Add the first vm_capable node we find which is not included,
3544 # excluding the master node (which we already have)
3545 for node in absent_nodes:
3546 nodeinfo = self.all_node_info[node]
3547 if (nodeinfo.vm_capable and not nodeinfo.offline and
3548 node != master_node):
3549 additional_nodes.append(node)
3550 vf_node_info.append(self.all_node_info[node])
3552 key = constants.NV_FILELIST
3553 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3554 {key: node_verify_param[key]},
3555 self.cfg.GetClusterName()))
3557 vf_nvinfo = all_nvinfo
3558 vf_node_info = self.my_node_info.values()
3560 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3562 feedback_fn("* Verifying node status")
3566 for node_i in node_data_list:
3568 nimg = node_image[node]
3572 feedback_fn("* Skipping offline node %s" % (node,))
3576 if node == master_node:
3578 elif node_i.master_candidate:
3579 ntype = "master candidate"
3580 elif node_i.drained:
3586 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3588 msg = all_nvinfo[node].fail_msg
3589 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3592 nimg.rpc_fail = True
3595 nresult = all_nvinfo[node].payload
3597 nimg.call_ok = self._VerifyNode(node_i, nresult)
3598 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3599 self._VerifyNodeNetwork(node_i, nresult)
3600 self._VerifyNodeUserScripts(node_i, nresult)
3601 self._VerifyOob(node_i, nresult)
3602 self._VerifyFileStoragePaths(node_i, nresult,
3603 node == master_node)
3606 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3607 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3610 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3611 self._UpdateNodeInstances(node_i, nresult, nimg)
3612 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3613 self._UpdateNodeOS(node_i, nresult, nimg)
3615 if not nimg.os_fail:
3616 if refos_img is None:
3618 self._VerifyNodeOS(node_i, nimg, refos_img)
3619 self._VerifyNodeBridges(node_i, nresult, bridges)
3621 # Check whether all running instancies are primary for the node. (This
3622 # can no longer be done from _VerifyInstance below, since some of the
3623 # wrong instances could be from other node groups.)
3624 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3626 for inst in non_primary_inst:
3627 test = inst in self.all_inst_info
3628 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3629 "instance should not run on node %s", node_i.name)
3630 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3631 "node is running unknown instance %s", inst)
3633 self._VerifyGroupLVM(node_image, vg_name)
3635 for node, result in extra_lv_nvinfo.items():
3636 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3637 node_image[node], vg_name)
3639 feedback_fn("* Verifying instance status")
3640 for instance in self.my_inst_names:
3642 feedback_fn("* Verifying instance %s" % instance)
3643 inst_config = self.my_inst_info[instance]
3644 self._VerifyInstance(instance, inst_config, node_image,
3647 # If the instance is non-redundant we cannot survive losing its primary
3648 # node, so we are not N+1 compliant.
3649 if inst_config.disk_template not in constants.DTS_MIRRORED:
3650 i_non_redundant.append(instance)
3652 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3653 i_non_a_balanced.append(instance)
3655 feedback_fn("* Verifying orphan volumes")
3656 reserved = utils.FieldSet(*cluster.reserved_lvs)
3658 # We will get spurious "unknown volume" warnings if any node of this group
3659 # is secondary for an instance whose primary is in another group. To avoid
3660 # them, we find these instances and add their volumes to node_vol_should.
3661 for inst in self.all_inst_info.values():
3662 for secondary in inst.secondary_nodes:
3663 if (secondary in self.my_node_info
3664 and inst.name not in self.my_inst_info):
3665 inst.MapLVsByNode(node_vol_should)
3668 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3670 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3671 feedback_fn("* Verifying N+1 Memory redundancy")
3672 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3674 feedback_fn("* Other Notes")
3676 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3677 % len(i_non_redundant))
3679 if i_non_a_balanced:
3680 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3681 % len(i_non_a_balanced))
3684 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3687 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3690 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3694 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3695 """Analyze the post-hooks' result
3697 This method analyses the hook result, handles it, and sends some
3698 nicely-formatted feedback back to the user.
3700 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3701 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3702 @param hooks_results: the results of the multi-node hooks rpc call
3703 @param feedback_fn: function used send feedback back to the caller
3704 @param lu_result: previous Exec result
3705 @return: the new Exec result, based on the previous result
3709 # We only really run POST phase hooks, only for non-empty groups,
3710 # and are only interested in their results
3711 if not self.my_node_names:
3714 elif phase == constants.HOOKS_PHASE_POST:
3715 # Used to change hooks' output to proper indentation
3716 feedback_fn("* Hooks Results")
3717 assert hooks_results, "invalid result from hooks"
3719 for node_name in hooks_results:
3720 res = hooks_results[node_name]
3722 test = msg and not res.offline
3723 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3724 "Communication failure in hooks execution: %s", msg)
3725 if res.offline or msg:
3726 # No need to investigate payload if node is offline or gave
3729 for script, hkr, output in res.payload:
3730 test = hkr == constants.HKR_FAIL
3731 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3732 "Script %s failed, output:", script)
3734 output = self._HOOKS_INDENT_RE.sub(" ", output)
3735 feedback_fn("%s" % output)
3741 class LUClusterVerifyDisks(NoHooksLU):
3742 """Verifies the cluster disks status.
3747 def ExpandNames(self):
3748 self.share_locks = _ShareAll()
3749 self.needed_locks = {
3750 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3753 def Exec(self, feedback_fn):
3754 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3756 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3757 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3758 for group in group_names])
3761 class LUGroupVerifyDisks(NoHooksLU):
3762 """Verifies the status of all disks in a node group.
3767 def ExpandNames(self):
3768 # Raises errors.OpPrereqError on its own if group can't be found
3769 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3771 self.share_locks = _ShareAll()
3772 self.needed_locks = {
3773 locking.LEVEL_INSTANCE: [],
3774 locking.LEVEL_NODEGROUP: [],
3775 locking.LEVEL_NODE: [],
3777 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3778 # starts one instance of this opcode for every group, which means all
3779 # nodes will be locked for a short amount of time, so it's better to
3780 # acquire the node allocation lock as well.
3781 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3784 def DeclareLocks(self, level):
3785 if level == locking.LEVEL_INSTANCE:
3786 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3788 # Lock instances optimistically, needs verification once node and group
3789 # locks have been acquired
3790 self.needed_locks[locking.LEVEL_INSTANCE] = \
3791 self.cfg.GetNodeGroupInstances(self.group_uuid)
3793 elif level == locking.LEVEL_NODEGROUP:
3794 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3796 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3797 set([self.group_uuid] +
3798 # Lock all groups used by instances optimistically; this requires
3799 # going via the node before it's locked, requiring verification
3802 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3803 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3805 elif level == locking.LEVEL_NODE:
3806 # This will only lock the nodes in the group to be verified which contain
3808 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3809 self._LockInstancesNodes()
3811 # Lock all nodes in group to be verified
3812 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3813 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3814 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3816 def CheckPrereq(self):
3817 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3818 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3819 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3821 assert self.group_uuid in owned_groups
3823 # Check if locked instances are still correct
3824 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3826 # Get instance information
3827 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3829 # Check if node groups for locked instances are still correct
3830 _CheckInstancesNodeGroups(self.cfg, self.instances,
3831 owned_groups, owned_nodes, self.group_uuid)
3833 def Exec(self, feedback_fn):
3834 """Verify integrity of cluster disks.
3836 @rtype: tuple of three items
3837 @return: a tuple of (dict of node-to-node_error, list of instances
3838 which need activate-disks, dict of instance: (node, volume) for
3843 res_instances = set()
3846 nv_dict = _MapInstanceDisksToNodes(
3847 [inst for inst in self.instances.values()
3848 if inst.admin_state == constants.ADMINST_UP])
3851 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3852 set(self.cfg.GetVmCapableNodeList()))
3854 node_lvs = self.rpc.call_lv_list(nodes, [])
3856 for (node, node_res) in node_lvs.items():
3857 if node_res.offline:
3860 msg = node_res.fail_msg
3862 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3863 res_nodes[node] = msg
3866 for lv_name, (_, _, lv_online) in node_res.payload.items():
3867 inst = nv_dict.pop((node, lv_name), None)
3868 if not (lv_online or inst is None):
3869 res_instances.add(inst)
3871 # any leftover items in nv_dict are missing LVs, let's arrange the data
3873 for key, inst in nv_dict.iteritems():
3874 res_missing.setdefault(inst, []).append(list(key))
3876 return (res_nodes, list(res_instances), res_missing)
3879 class LUClusterRepairDiskSizes(NoHooksLU):
3880 """Verifies the cluster disks sizes.
3885 def ExpandNames(self):
3886 if self.op.instances:
3887 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3888 # Not getting the node allocation lock as only a specific set of
3889 # instances (and their nodes) is going to be acquired
3890 self.needed_locks = {
3891 locking.LEVEL_NODE_RES: [],
3892 locking.LEVEL_INSTANCE: self.wanted_names,
3894 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3896 self.wanted_names = None
3897 self.needed_locks = {
3898 locking.LEVEL_NODE_RES: locking.ALL_SET,
3899 locking.LEVEL_INSTANCE: locking.ALL_SET,
3901 # This opcode is acquires the node locks for all instances
3902 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3905 self.share_locks = {
3906 locking.LEVEL_NODE_RES: 1,
3907 locking.LEVEL_INSTANCE: 0,
3908 locking.LEVEL_NODE_ALLOC: 1,
3911 def DeclareLocks(self, level):
3912 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3913 self._LockInstancesNodes(primary_only=True, level=level)
3915 def CheckPrereq(self):
3916 """Check prerequisites.
3918 This only checks the optional instance list against the existing names.
3921 if self.wanted_names is None:
3922 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3924 self.wanted_instances = \
3925 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3927 def _EnsureChildSizes(self, disk):
3928 """Ensure children of the disk have the needed disk size.
3930 This is valid mainly for DRBD8 and fixes an issue where the
3931 children have smaller disk size.
3933 @param disk: an L{ganeti.objects.Disk} object
3936 if disk.dev_type == constants.LD_DRBD8:
3937 assert disk.children, "Empty children for DRBD8?"
3938 fchild = disk.children[0]
3939 mismatch = fchild.size < disk.size
3941 self.LogInfo("Child disk has size %d, parent %d, fixing",
3942 fchild.size, disk.size)
3943 fchild.size = disk.size
3945 # and we recurse on this child only, not on the metadev
3946 return self._EnsureChildSizes(fchild) or mismatch
3950 def Exec(self, feedback_fn):
3951 """Verify the size of cluster disks.
3954 # TODO: check child disks too
3955 # TODO: check differences in size between primary/secondary nodes
3957 for instance in self.wanted_instances:
3958 pnode = instance.primary_node
3959 if pnode not in per_node_disks:
3960 per_node_disks[pnode] = []
3961 for idx, disk in enumerate(instance.disks):
3962 per_node_disks[pnode].append((instance, idx, disk))
3964 assert not (frozenset(per_node_disks.keys()) -
3965 self.owned_locks(locking.LEVEL_NODE_RES)), \
3966 "Not owning correct locks"
3967 assert not self.owned_locks(locking.LEVEL_NODE)
3970 for node, dskl in per_node_disks.items():
3971 newl = [v[2].Copy() for v in dskl]
3973 self.cfg.SetDiskID(dsk, node)
3974 result = self.rpc.call_blockdev_getsize(node, newl)
3976 self.LogWarning("Failure in blockdev_getsize call to node"
3977 " %s, ignoring", node)
3979 if len(result.payload) != len(dskl):
3980 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3981 " result.payload=%s", node, len(dskl), result.payload)
3982 self.LogWarning("Invalid result from node %s, ignoring node results",
3985 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3987 self.LogWarning("Disk %d of instance %s did not return size"
3988 " information, ignoring", idx, instance.name)
3990 if not isinstance(size, (int, long)):
3991 self.LogWarning("Disk %d of instance %s did not return valid"
3992 " size information, ignoring", idx, instance.name)
3995 if size != disk.size:
3996 self.LogInfo("Disk %d of instance %s has mismatched size,"
3997 " correcting: recorded %d, actual %d", idx,
3998 instance.name, disk.size, size)
4000 self.cfg.Update(instance, feedback_fn)
4001 changed.append((instance.name, idx, size))
4002 if self._EnsureChildSizes(disk):
4003 self.cfg.Update(instance, feedback_fn)
4004 changed.append((instance.name, idx, disk.size))
4008 class LUClusterRename(LogicalUnit):
4009 """Rename the cluster.
4012 HPATH = "cluster-rename"
4013 HTYPE = constants.HTYPE_CLUSTER
4015 def BuildHooksEnv(self):
4020 "OP_TARGET": self.cfg.GetClusterName(),
4021 "NEW_NAME": self.op.name,
4024 def BuildHooksNodes(self):
4025 """Build hooks nodes.
4028 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4030 def CheckPrereq(self):
4031 """Verify that the passed name is a valid one.
4034 hostname = netutils.GetHostname(name=self.op.name,
4035 family=self.cfg.GetPrimaryIPFamily())
4037 new_name = hostname.name
4038 self.ip = new_ip = hostname.ip
4039 old_name = self.cfg.GetClusterName()
4040 old_ip = self.cfg.GetMasterIP()
4041 if new_name == old_name and new_ip == old_ip:
4042 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4043 " cluster has changed",
4045 if new_ip != old_ip:
4046 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4047 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4048 " reachable on the network" %
4049 new_ip, errors.ECODE_NOTUNIQUE)
4051 self.op.name = new_name
4053 def Exec(self, feedback_fn):
4054 """Rename the cluster.
4057 clustername = self.op.name
4060 # shutdown the master IP
4061 master_params = self.cfg.GetMasterNetworkParameters()
4062 ems = self.cfg.GetUseExternalMipScript()
4063 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4065 result.Raise("Could not disable the master role")
4068 cluster = self.cfg.GetClusterInfo()
4069 cluster.cluster_name = clustername
4070 cluster.master_ip = new_ip
4071 self.cfg.Update(cluster, feedback_fn)
4073 # update the known hosts file
4074 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4075 node_list = self.cfg.GetOnlineNodeList()
4077 node_list.remove(master_params.name)
4080 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4082 master_params.ip = new_ip
4083 result = self.rpc.call_node_activate_master_ip(master_params.name,
4085 msg = result.fail_msg
4087 self.LogWarning("Could not re-enable the master role on"
4088 " the master, please restart manually: %s", msg)
4093 def _ValidateNetmask(cfg, netmask):
4094 """Checks if a netmask is valid.
4096 @type cfg: L{config.ConfigWriter}
4097 @param cfg: The cluster configuration
4099 @param netmask: the netmask to be verified
4100 @raise errors.OpPrereqError: if the validation fails
4103 ip_family = cfg.GetPrimaryIPFamily()
4105 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4106 except errors.ProgrammerError:
4107 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4108 ip_family, errors.ECODE_INVAL)
4109 if not ipcls.ValidateNetmask(netmask):
4110 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4111 (netmask), errors.ECODE_INVAL)
4114 class LUClusterSetParams(LogicalUnit):
4115 """Change the parameters of the cluster.
4118 HPATH = "cluster-modify"
4119 HTYPE = constants.HTYPE_CLUSTER
4122 def CheckArguments(self):
4126 if self.op.uid_pool:
4127 uidpool.CheckUidPool(self.op.uid_pool)
4129 if self.op.add_uids:
4130 uidpool.CheckUidPool(self.op.add_uids)
4132 if self.op.remove_uids:
4133 uidpool.CheckUidPool(self.op.remove_uids)
4135 if self.op.master_netmask is not None:
4136 _ValidateNetmask(self.cfg, self.op.master_netmask)
4138 if self.op.diskparams:
4139 for dt_params in self.op.diskparams.values():
4140 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4142 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4143 except errors.OpPrereqError, err:
4144 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4147 def ExpandNames(self):
4148 # FIXME: in the future maybe other cluster params won't require checking on
4149 # all nodes to be modified.
4150 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4151 # resource locks the right thing, shouldn't it be the BGL instead?
4152 self.needed_locks = {
4153 locking.LEVEL_NODE: locking.ALL_SET,
4154 locking.LEVEL_INSTANCE: locking.ALL_SET,
4155 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4156 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4158 self.share_locks = _ShareAll()
4160 def BuildHooksEnv(self):
4165 "OP_TARGET": self.cfg.GetClusterName(),
4166 "NEW_VG_NAME": self.op.vg_name,
4169 def BuildHooksNodes(self):
4170 """Build hooks nodes.
4173 mn = self.cfg.GetMasterNode()
4176 def CheckPrereq(self):
4177 """Check prerequisites.
4179 This checks whether the given params don't conflict and
4180 if the given volume group is valid.
4183 if self.op.vg_name is not None and not self.op.vg_name:
4184 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4185 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4186 " instances exist", errors.ECODE_INVAL)
4188 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4189 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4190 raise errors.OpPrereqError("Cannot disable drbd helper while"
4191 " drbd-based instances exist",
4194 node_list = self.owned_locks(locking.LEVEL_NODE)
4196 # if vg_name not None, checks given volume group on all nodes
4198 vglist = self.rpc.call_vg_list(node_list)
4199 for node in node_list:
4200 msg = vglist[node].fail_msg
4202 # ignoring down node
4203 self.LogWarning("Error while gathering data on node %s"
4204 " (ignoring node): %s", node, msg)
4206 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4208 constants.MIN_VG_SIZE)
4210 raise errors.OpPrereqError("Error on node '%s': %s" %
4211 (node, vgstatus), errors.ECODE_ENVIRON)
4213 if self.op.drbd_helper:
4214 # checks given drbd helper on all nodes
4215 helpers = self.rpc.call_drbd_helper(node_list)
4216 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4218 self.LogInfo("Not checking drbd helper on offline node %s", node)
4220 msg = helpers[node].fail_msg
4222 raise errors.OpPrereqError("Error checking drbd helper on node"
4223 " '%s': %s" % (node, msg),
4224 errors.ECODE_ENVIRON)
4225 node_helper = helpers[node].payload
4226 if node_helper != self.op.drbd_helper:
4227 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4228 (node, node_helper), errors.ECODE_ENVIRON)
4230 self.cluster = cluster = self.cfg.GetClusterInfo()
4231 # validate params changes
4232 if self.op.beparams:
4233 objects.UpgradeBeParams(self.op.beparams)
4234 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4235 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4237 if self.op.ndparams:
4238 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4239 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4241 # TODO: we need a more general way to handle resetting
4242 # cluster-level parameters to default values
4243 if self.new_ndparams["oob_program"] == "":
4244 self.new_ndparams["oob_program"] = \
4245 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4247 if self.op.hv_state:
4248 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4249 self.cluster.hv_state_static)
4250 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4251 for hv, values in new_hv_state.items())
4253 if self.op.disk_state:
4254 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4255 self.cluster.disk_state_static)
4256 self.new_disk_state = \
4257 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4258 for name, values in svalues.items()))
4259 for storage, svalues in new_disk_state.items())
4262 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4265 all_instances = self.cfg.GetAllInstancesInfo().values()
4267 for group in self.cfg.GetAllNodeGroupsInfo().values():
4268 instances = frozenset([inst for inst in all_instances
4269 if compat.any(node in group.members
4270 for node in inst.all_nodes)])
4271 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4272 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4273 new = _ComputeNewInstanceViolations(ipol,
4274 new_ipolicy, instances)
4276 violations.update(new)
4279 self.LogWarning("After the ipolicy change the following instances"
4280 " violate them: %s",
4281 utils.CommaJoin(utils.NiceSort(violations)))
4283 if self.op.nicparams:
4284 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4285 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4286 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4289 # check all instances for consistency
4290 for instance in self.cfg.GetAllInstancesInfo().values():
4291 for nic_idx, nic in enumerate(instance.nics):
4292 params_copy = copy.deepcopy(nic.nicparams)
4293 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4295 # check parameter syntax
4297 objects.NIC.CheckParameterSyntax(params_filled)
4298 except errors.ConfigurationError, err:
4299 nic_errors.append("Instance %s, nic/%d: %s" %
4300 (instance.name, nic_idx, err))
4302 # if we're moving instances to routed, check that they have an ip
4303 target_mode = params_filled[constants.NIC_MODE]
4304 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4305 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4306 " address" % (instance.name, nic_idx))
4308 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4309 "\n".join(nic_errors), errors.ECODE_INVAL)
4311 # hypervisor list/parameters
4312 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4313 if self.op.hvparams:
4314 for hv_name, hv_dict in self.op.hvparams.items():
4315 if hv_name not in self.new_hvparams:
4316 self.new_hvparams[hv_name] = hv_dict
4318 self.new_hvparams[hv_name].update(hv_dict)
4320 # disk template parameters
4321 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4322 if self.op.diskparams:
4323 for dt_name, dt_params in self.op.diskparams.items():
4324 if dt_name not in self.op.diskparams:
4325 self.new_diskparams[dt_name] = dt_params
4327 self.new_diskparams[dt_name].update(dt_params)
4329 # os hypervisor parameters
4330 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4332 for os_name, hvs in self.op.os_hvp.items():
4333 if os_name not in self.new_os_hvp:
4334 self.new_os_hvp[os_name] = hvs
4336 for hv_name, hv_dict in hvs.items():
4338 # Delete if it exists
4339 self.new_os_hvp[os_name].pop(hv_name, None)
4340 elif hv_name not in self.new_os_hvp[os_name]:
4341 self.new_os_hvp[os_name][hv_name] = hv_dict
4343 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4346 self.new_osp = objects.FillDict(cluster.osparams, {})
4347 if self.op.osparams:
4348 for os_name, osp in self.op.osparams.items():
4349 if os_name not in self.new_osp:
4350 self.new_osp[os_name] = {}
4352 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4355 if not self.new_osp[os_name]:
4356 # we removed all parameters
4357 del self.new_osp[os_name]
4359 # check the parameter validity (remote check)
4360 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4361 os_name, self.new_osp[os_name])
4363 # changes to the hypervisor list
4364 if self.op.enabled_hypervisors is not None:
4365 self.hv_list = self.op.enabled_hypervisors
4366 for hv in self.hv_list:
4367 # if the hypervisor doesn't already exist in the cluster
4368 # hvparams, we initialize it to empty, and then (in both
4369 # cases) we make sure to fill the defaults, as we might not
4370 # have a complete defaults list if the hypervisor wasn't
4372 if hv not in new_hvp:
4374 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4375 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4377 self.hv_list = cluster.enabled_hypervisors
4379 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4380 # either the enabled list has changed, or the parameters have, validate
4381 for hv_name, hv_params in self.new_hvparams.items():
4382 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4383 (self.op.enabled_hypervisors and
4384 hv_name in self.op.enabled_hypervisors)):
4385 # either this is a new hypervisor, or its parameters have changed
4386 hv_class = hypervisor.GetHypervisorClass(hv_name)
4387 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4388 hv_class.CheckParameterSyntax(hv_params)
4389 _CheckHVParams(self, node_list, hv_name, hv_params)
4392 # no need to check any newly-enabled hypervisors, since the
4393 # defaults have already been checked in the above code-block
4394 for os_name, os_hvp in self.new_os_hvp.items():
4395 for hv_name, hv_params in os_hvp.items():
4396 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4397 # we need to fill in the new os_hvp on top of the actual hv_p
4398 cluster_defaults = self.new_hvparams.get(hv_name, {})
4399 new_osp = objects.FillDict(cluster_defaults, hv_params)
4400 hv_class = hypervisor.GetHypervisorClass(hv_name)
4401 hv_class.CheckParameterSyntax(new_osp)
4402 _CheckHVParams(self, node_list, hv_name, new_osp)
4404 if self.op.default_iallocator:
4405 alloc_script = utils.FindFile(self.op.default_iallocator,
4406 constants.IALLOCATOR_SEARCH_PATH,
4408 if alloc_script is None:
4409 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4410 " specified" % self.op.default_iallocator,
4413 def Exec(self, feedback_fn):
4414 """Change the parameters of the cluster.
4417 if self.op.vg_name is not None:
4418 new_volume = self.op.vg_name
4421 if new_volume != self.cfg.GetVGName():
4422 self.cfg.SetVGName(new_volume)
4424 feedback_fn("Cluster LVM configuration already in desired"
4425 " state, not changing")
4426 if self.op.drbd_helper is not None:
4427 new_helper = self.op.drbd_helper
4430 if new_helper != self.cfg.GetDRBDHelper():
4431 self.cfg.SetDRBDHelper(new_helper)
4433 feedback_fn("Cluster DRBD helper already in desired state,"
4435 if self.op.hvparams:
4436 self.cluster.hvparams = self.new_hvparams
4438 self.cluster.os_hvp = self.new_os_hvp
4439 if self.op.enabled_hypervisors is not None:
4440 self.cluster.hvparams = self.new_hvparams
4441 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4442 if self.op.beparams:
4443 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4444 if self.op.nicparams:
4445 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4447 self.cluster.ipolicy = self.new_ipolicy
4448 if self.op.osparams:
4449 self.cluster.osparams = self.new_osp
4450 if self.op.ndparams:
4451 self.cluster.ndparams = self.new_ndparams
4452 if self.op.diskparams:
4453 self.cluster.diskparams = self.new_diskparams
4454 if self.op.hv_state:
4455 self.cluster.hv_state_static = self.new_hv_state
4456 if self.op.disk_state:
4457 self.cluster.disk_state_static = self.new_disk_state
4459 if self.op.candidate_pool_size is not None:
4460 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4461 # we need to update the pool size here, otherwise the save will fail
4462 _AdjustCandidatePool(self, [])
4464 if self.op.maintain_node_health is not None:
4465 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4466 feedback_fn("Note: CONFD was disabled at build time, node health"
4467 " maintenance is not useful (still enabling it)")
4468 self.cluster.maintain_node_health = self.op.maintain_node_health
4470 if self.op.prealloc_wipe_disks is not None:
4471 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4473 if self.op.add_uids is not None:
4474 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4476 if self.op.remove_uids is not None:
4477 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4479 if self.op.uid_pool is not None:
4480 self.cluster.uid_pool = self.op.uid_pool
4482 if self.op.default_iallocator is not None:
4483 self.cluster.default_iallocator = self.op.default_iallocator
4485 if self.op.reserved_lvs is not None:
4486 self.cluster.reserved_lvs = self.op.reserved_lvs
4488 if self.op.use_external_mip_script is not None:
4489 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4491 def helper_os(aname, mods, desc):
4493 lst = getattr(self.cluster, aname)
4494 for key, val in mods:
4495 if key == constants.DDM_ADD:
4497 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4500 elif key == constants.DDM_REMOVE:
4504 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4506 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4508 if self.op.hidden_os:
4509 helper_os("hidden_os", self.op.hidden_os, "hidden")
4511 if self.op.blacklisted_os:
4512 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4514 if self.op.master_netdev:
4515 master_params = self.cfg.GetMasterNetworkParameters()
4516 ems = self.cfg.GetUseExternalMipScript()
4517 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4518 self.cluster.master_netdev)
4519 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4521 result.Raise("Could not disable the master ip")
4522 feedback_fn("Changing master_netdev from %s to %s" %
4523 (master_params.netdev, self.op.master_netdev))
4524 self.cluster.master_netdev = self.op.master_netdev
4526 if self.op.master_netmask:
4527 master_params = self.cfg.GetMasterNetworkParameters()
4528 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4529 result = self.rpc.call_node_change_master_netmask(master_params.name,
4530 master_params.netmask,
4531 self.op.master_netmask,
4533 master_params.netdev)
4535 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4538 self.cluster.master_netmask = self.op.master_netmask
4540 self.cfg.Update(self.cluster, feedback_fn)
4542 if self.op.master_netdev:
4543 master_params = self.cfg.GetMasterNetworkParameters()
4544 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4545 self.op.master_netdev)
4546 ems = self.cfg.GetUseExternalMipScript()
4547 result = self.rpc.call_node_activate_master_ip(master_params.name,
4550 self.LogWarning("Could not re-enable the master ip on"
4551 " the master, please restart manually: %s",
4555 def _UploadHelper(lu, nodes, fname):
4556 """Helper for uploading a file and showing warnings.
4559 if os.path.exists(fname):
4560 result = lu.rpc.call_upload_file(nodes, fname)
4561 for to_node, to_result in result.items():
4562 msg = to_result.fail_msg
4564 msg = ("Copy of file %s to node %s failed: %s" %
4565 (fname, to_node, msg))
4569 def _ComputeAncillaryFiles(cluster, redist):
4570 """Compute files external to Ganeti which need to be consistent.
4572 @type redist: boolean
4573 @param redist: Whether to include files which need to be redistributed
4576 # Compute files for all nodes
4578 pathutils.SSH_KNOWN_HOSTS_FILE,
4579 pathutils.CONFD_HMAC_KEY,
4580 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4581 pathutils.SPICE_CERT_FILE,
4582 pathutils.SPICE_CACERT_FILE,
4583 pathutils.RAPI_USERS_FILE,
4587 # we need to ship at least the RAPI certificate
4588 files_all.add(pathutils.RAPI_CERT_FILE)
4590 files_all.update(pathutils.ALL_CERT_FILES)
4591 files_all.update(ssconf.SimpleStore().GetFileList())
4593 if cluster.modify_etc_hosts:
4594 files_all.add(pathutils.ETC_HOSTS)
4596 if cluster.use_external_mip_script:
4597 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4599 # Files which are optional, these must:
4600 # - be present in one other category as well
4601 # - either exist or not exist on all nodes of that category (mc, vm all)
4603 pathutils.RAPI_USERS_FILE,
4606 # Files which should only be on master candidates
4610 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4614 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4615 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4616 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4618 # Files which should only be on VM-capable nodes
4621 for hv_name in cluster.enabled_hypervisors
4623 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4627 for hv_name in cluster.enabled_hypervisors
4629 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4631 # Filenames in each category must be unique
4632 all_files_set = files_all | files_mc | files_vm
4633 assert (len(all_files_set) ==
4634 sum(map(len, [files_all, files_mc, files_vm]))), \
4635 "Found file listed in more than one file list"
4637 # Optional files must be present in one other category
4638 assert all_files_set.issuperset(files_opt), \
4639 "Optional file not in a different required list"
4641 # This one file should never ever be re-distributed via RPC
4642 assert not (redist and
4643 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4645 return (files_all, files_opt, files_mc, files_vm)
4648 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4649 """Distribute additional files which are part of the cluster configuration.
4651 ConfigWriter takes care of distributing the config and ssconf files, but
4652 there are more files which should be distributed to all nodes. This function
4653 makes sure those are copied.
4655 @param lu: calling logical unit
4656 @param additional_nodes: list of nodes not in the config to distribute to
4657 @type additional_vm: boolean
4658 @param additional_vm: whether the additional nodes are vm-capable or not
4661 # Gather target nodes
4662 cluster = lu.cfg.GetClusterInfo()
4663 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4665 online_nodes = lu.cfg.GetOnlineNodeList()
4666 online_set = frozenset(online_nodes)
4667 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4669 if additional_nodes is not None:
4670 online_nodes.extend(additional_nodes)
4672 vm_nodes.extend(additional_nodes)
4674 # Never distribute to master node
4675 for nodelist in [online_nodes, vm_nodes]:
4676 if master_info.name in nodelist:
4677 nodelist.remove(master_info.name)
4680 (files_all, _, files_mc, files_vm) = \
4681 _ComputeAncillaryFiles(cluster, True)
4683 # Never re-distribute configuration file from here
4684 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4685 pathutils.CLUSTER_CONF_FILE in files_vm)
4686 assert not files_mc, "Master candidates not handled in this function"
4689 (online_nodes, files_all),
4690 (vm_nodes, files_vm),
4694 for (node_list, files) in filemap:
4696 _UploadHelper(lu, node_list, fname)
4699 class LUClusterRedistConf(NoHooksLU):
4700 """Force the redistribution of cluster configuration.
4702 This is a very simple LU.
4707 def ExpandNames(self):
4708 self.needed_locks = {
4709 locking.LEVEL_NODE: locking.ALL_SET,
4710 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4712 self.share_locks = _ShareAll()
4714 def Exec(self, feedback_fn):
4715 """Redistribute the configuration.
4718 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4719 _RedistributeAncillaryFiles(self)
4722 class LUClusterActivateMasterIp(NoHooksLU):
4723 """Activate the master IP on the master node.
4726 def Exec(self, feedback_fn):
4727 """Activate the master IP.
4730 master_params = self.cfg.GetMasterNetworkParameters()
4731 ems = self.cfg.GetUseExternalMipScript()
4732 result = self.rpc.call_node_activate_master_ip(master_params.name,
4734 result.Raise("Could not activate the master IP")
4737 class LUClusterDeactivateMasterIp(NoHooksLU):
4738 """Deactivate the master IP on the master node.
4741 def Exec(self, feedback_fn):
4742 """Deactivate the master IP.
4745 master_params = self.cfg.GetMasterNetworkParameters()
4746 ems = self.cfg.GetUseExternalMipScript()
4747 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4749 result.Raise("Could not deactivate the master IP")
4752 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4753 """Sleep and poll for an instance's disk to sync.
4756 if not instance.disks or disks is not None and not disks:
4759 disks = _ExpandCheckDisks(instance, disks)
4762 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4764 node = instance.primary_node
4767 lu.cfg.SetDiskID(dev, node)
4769 # TODO: Convert to utils.Retry
4772 degr_retries = 10 # in seconds, as we sleep 1 second each time
4776 cumul_degraded = False
4777 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4778 msg = rstats.fail_msg
4780 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4783 raise errors.RemoteError("Can't contact node %s for mirror data,"
4784 " aborting." % node)
4787 rstats = rstats.payload
4789 for i, mstat in enumerate(rstats):
4791 lu.LogWarning("Can't compute data for node %s/%s",
4792 node, disks[i].iv_name)
4795 cumul_degraded = (cumul_degraded or
4796 (mstat.is_degraded and mstat.sync_percent is None))
4797 if mstat.sync_percent is not None:
4799 if mstat.estimated_time is not None:
4800 rem_time = ("%s remaining (estimated)" %
4801 utils.FormatSeconds(mstat.estimated_time))
4802 max_time = mstat.estimated_time
4804 rem_time = "no time estimate"
4805 lu.LogInfo("- device %s: %5.2f%% done, %s",
4806 disks[i].iv_name, mstat.sync_percent, rem_time)
4808 # if we're done but degraded, let's do a few small retries, to
4809 # make sure we see a stable and not transient situation; therefore
4810 # we force restart of the loop
4811 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4812 logging.info("Degraded disks found, %d retries left", degr_retries)
4820 time.sleep(min(60, max_time))
4823 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4825 return not cumul_degraded
4828 def _BlockdevFind(lu, node, dev, instance):
4829 """Wrapper around call_blockdev_find to annotate diskparams.
4831 @param lu: A reference to the lu object
4832 @param node: The node to call out
4833 @param dev: The device to find
4834 @param instance: The instance object the device belongs to
4835 @returns The result of the rpc call
4838 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4839 return lu.rpc.call_blockdev_find(node, disk)
4842 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4843 """Wrapper around L{_CheckDiskConsistencyInner}.
4846 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4847 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4851 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4853 """Check that mirrors are not degraded.
4855 @attention: The device has to be annotated already.
4857 The ldisk parameter, if True, will change the test from the
4858 is_degraded attribute (which represents overall non-ok status for
4859 the device(s)) to the ldisk (representing the local storage status).
4862 lu.cfg.SetDiskID(dev, node)
4866 if on_primary or dev.AssembleOnSecondary():
4867 rstats = lu.rpc.call_blockdev_find(node, dev)
4868 msg = rstats.fail_msg
4870 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4872 elif not rstats.payload:
4873 lu.LogWarning("Can't find disk on node %s", node)
4877 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4879 result = result and not rstats.payload.is_degraded
4882 for child in dev.children:
4883 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4889 class LUOobCommand(NoHooksLU):
4890 """Logical unit for OOB handling.
4894 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4896 def ExpandNames(self):
4897 """Gather locks we need.
4900 if self.op.node_names:
4901 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4902 lock_names = self.op.node_names
4904 lock_names = locking.ALL_SET
4906 self.needed_locks = {
4907 locking.LEVEL_NODE: lock_names,
4910 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4912 if not self.op.node_names:
4913 # Acquire node allocation lock only if all nodes are affected
4914 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4916 def CheckPrereq(self):
4917 """Check prerequisites.
4920 - the node exists in the configuration
4923 Any errors are signaled by raising errors.OpPrereqError.
4927 self.master_node = self.cfg.GetMasterNode()
4929 assert self.op.power_delay >= 0.0
4931 if self.op.node_names:
4932 if (self.op.command in self._SKIP_MASTER and
4933 self.master_node in self.op.node_names):
4934 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4935 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4937 if master_oob_handler:
4938 additional_text = ("run '%s %s %s' if you want to operate on the"
4939 " master regardless") % (master_oob_handler,
4943 additional_text = "it does not support out-of-band operations"
4945 raise errors.OpPrereqError(("Operating on the master node %s is not"
4946 " allowed for %s; %s") %
4947 (self.master_node, self.op.command,
4948 additional_text), errors.ECODE_INVAL)
4950 self.op.node_names = self.cfg.GetNodeList()
4951 if self.op.command in self._SKIP_MASTER:
4952 self.op.node_names.remove(self.master_node)
4954 if self.op.command in self._SKIP_MASTER:
4955 assert self.master_node not in self.op.node_names
4957 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4959 raise errors.OpPrereqError("Node %s not found" % node_name,
4962 self.nodes.append(node)
4964 if (not self.op.ignore_status and
4965 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4966 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4967 " not marked offline") % node_name,
4970 def Exec(self, feedback_fn):
4971 """Execute OOB and return result if we expect any.
4974 master_node = self.master_node
4977 for idx, node in enumerate(utils.NiceSort(self.nodes,
4978 key=lambda node: node.name)):
4979 node_entry = [(constants.RS_NORMAL, node.name)]
4980 ret.append(node_entry)
4982 oob_program = _SupportsOob(self.cfg, node)
4985 node_entry.append((constants.RS_UNAVAIL, None))
4988 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4989 self.op.command, oob_program, node.name)
4990 result = self.rpc.call_run_oob(master_node, oob_program,
4991 self.op.command, node.name,
4995 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4996 node.name, result.fail_msg)
4997 node_entry.append((constants.RS_NODATA, None))
5000 self._CheckPayload(result)
5001 except errors.OpExecError, err:
5002 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5004 node_entry.append((constants.RS_NODATA, None))
5006 if self.op.command == constants.OOB_HEALTH:
5007 # For health we should log important events
5008 for item, status in result.payload:
5009 if status in [constants.OOB_STATUS_WARNING,
5010 constants.OOB_STATUS_CRITICAL]:
5011 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5012 item, node.name, status)
5014 if self.op.command == constants.OOB_POWER_ON:
5016 elif self.op.command == constants.OOB_POWER_OFF:
5017 node.powered = False
5018 elif self.op.command == constants.OOB_POWER_STATUS:
5019 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5020 if powered != node.powered:
5021 logging.warning(("Recorded power state (%s) of node '%s' does not"
5022 " match actual power state (%s)"), node.powered,
5025 # For configuration changing commands we should update the node
5026 if self.op.command in (constants.OOB_POWER_ON,
5027 constants.OOB_POWER_OFF):
5028 self.cfg.Update(node, feedback_fn)
5030 node_entry.append((constants.RS_NORMAL, result.payload))
5032 if (self.op.command == constants.OOB_POWER_ON and
5033 idx < len(self.nodes) - 1):
5034 time.sleep(self.op.power_delay)
5038 def _CheckPayload(self, result):
5039 """Checks if the payload is valid.
5041 @param result: RPC result
5042 @raises errors.OpExecError: If payload is not valid
5046 if self.op.command == constants.OOB_HEALTH:
5047 if not isinstance(result.payload, list):
5048 errs.append("command 'health' is expected to return a list but got %s" %
5049 type(result.payload))
5051 for item, status in result.payload:
5052 if status not in constants.OOB_STATUSES:
5053 errs.append("health item '%s' has invalid status '%s'" %
5056 if self.op.command == constants.OOB_POWER_STATUS:
5057 if not isinstance(result.payload, dict):
5058 errs.append("power-status is expected to return a dict but got %s" %
5059 type(result.payload))
5061 if self.op.command in [
5062 constants.OOB_POWER_ON,
5063 constants.OOB_POWER_OFF,
5064 constants.OOB_POWER_CYCLE,
5066 if result.payload is not None:
5067 errs.append("%s is expected to not return payload but got '%s'" %
5068 (self.op.command, result.payload))
5071 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5072 utils.CommaJoin(errs))
5075 class _OsQuery(_QueryBase):
5076 FIELDS = query.OS_FIELDS
5078 def ExpandNames(self, lu):
5079 # Lock all nodes in shared mode
5080 # Temporary removal of locks, should be reverted later
5081 # TODO: reintroduce locks when they are lighter-weight
5082 lu.needed_locks = {}
5083 #self.share_locks[locking.LEVEL_NODE] = 1
5084 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5086 # The following variables interact with _QueryBase._GetNames
5088 self.wanted = self.names
5090 self.wanted = locking.ALL_SET
5092 self.do_locking = self.use_locking
5094 def DeclareLocks(self, lu, level):
5098 def _DiagnoseByOS(rlist):
5099 """Remaps a per-node return list into an a per-os per-node dictionary
5101 @param rlist: a map with node names as keys and OS objects as values
5104 @return: a dictionary with osnames as keys and as value another
5105 map, with nodes as keys and tuples of (path, status, diagnose,
5106 variants, parameters, api_versions) as values, eg::
5108 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5109 (/srv/..., False, "invalid api")],
5110 "node2": [(/srv/..., True, "", [], [])]}
5115 # we build here the list of nodes that didn't fail the RPC (at RPC
5116 # level), so that nodes with a non-responding node daemon don't
5117 # make all OSes invalid
5118 good_nodes = [node_name for node_name in rlist
5119 if not rlist[node_name].fail_msg]
5120 for node_name, nr in rlist.items():
5121 if nr.fail_msg or not nr.payload:
5123 for (name, path, status, diagnose, variants,
5124 params, api_versions) in nr.payload:
5125 if name not in all_os:
5126 # build a list of nodes for this os containing empty lists
5127 # for each node in node_list
5129 for nname in good_nodes:
5130 all_os[name][nname] = []
5131 # convert params from [name, help] to (name, help)
5132 params = [tuple(v) for v in params]
5133 all_os[name][node_name].append((path, status, diagnose,
5134 variants, params, api_versions))
5137 def _GetQueryData(self, lu):
5138 """Computes the list of nodes and their attributes.
5141 # Locking is not used
5142 assert not (compat.any(lu.glm.is_owned(level)
5143 for level in locking.LEVELS
5144 if level != locking.LEVEL_CLUSTER) or
5145 self.do_locking or self.use_locking)
5147 valid_nodes = [node.name
5148 for node in lu.cfg.GetAllNodesInfo().values()
5149 if not node.offline and node.vm_capable]
5150 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5151 cluster = lu.cfg.GetClusterInfo()
5155 for (os_name, os_data) in pol.items():
5156 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5157 hidden=(os_name in cluster.hidden_os),
5158 blacklisted=(os_name in cluster.blacklisted_os))
5162 api_versions = set()
5164 for idx, osl in enumerate(os_data.values()):
5165 info.valid = bool(info.valid and osl and osl[0][1])
5169 (node_variants, node_params, node_api) = osl[0][3:6]
5172 variants.update(node_variants)
5173 parameters.update(node_params)
5174 api_versions.update(node_api)
5176 # Filter out inconsistent values
5177 variants.intersection_update(node_variants)
5178 parameters.intersection_update(node_params)
5179 api_versions.intersection_update(node_api)
5181 info.variants = list(variants)
5182 info.parameters = list(parameters)
5183 info.api_versions = list(api_versions)
5185 data[os_name] = info
5187 # Prepare data in requested order
5188 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5192 class LUOsDiagnose(NoHooksLU):
5193 """Logical unit for OS diagnose/query.
5199 def _BuildFilter(fields, names):
5200 """Builds a filter for querying OSes.
5203 name_filter = qlang.MakeSimpleFilter("name", names)
5205 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5206 # respective field is not requested
5207 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5208 for fname in ["hidden", "blacklisted"]
5209 if fname not in fields]
5210 if "valid" not in fields:
5211 status_filter.append([qlang.OP_TRUE, "valid"])
5214 status_filter.insert(0, qlang.OP_AND)
5216 status_filter = None
5218 if name_filter and status_filter:
5219 return [qlang.OP_AND, name_filter, status_filter]
5223 return status_filter
5225 def CheckArguments(self):
5226 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5227 self.op.output_fields, False)
5229 def ExpandNames(self):
5230 self.oq.ExpandNames(self)
5232 def Exec(self, feedback_fn):
5233 return self.oq.OldStyleQuery(self)
5236 class _ExtStorageQuery(_QueryBase):
5237 FIELDS = query.EXTSTORAGE_FIELDS
5239 def ExpandNames(self, lu):
5240 # Lock all nodes in shared mode
5241 # Temporary removal of locks, should be reverted later
5242 # TODO: reintroduce locks when they are lighter-weight
5243 lu.needed_locks = {}
5244 #self.share_locks[locking.LEVEL_NODE] = 1
5245 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5247 # The following variables interact with _QueryBase._GetNames
5249 self.wanted = self.names
5251 self.wanted = locking.ALL_SET
5253 self.do_locking = self.use_locking
5255 def DeclareLocks(self, lu, level):
5259 def _DiagnoseByProvider(rlist):
5260 """Remaps a per-node return list into an a per-provider per-node dictionary
5262 @param rlist: a map with node names as keys and ExtStorage objects as values
5265 @return: a dictionary with extstorage providers as keys and as
5266 value another map, with nodes as keys and tuples of
5267 (path, status, diagnose, parameters) as values, eg::
5269 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5270 "node2": [(/srv/..., False, "missing file")]
5271 "node3": [(/srv/..., True, "", [])]
5276 # we build here the list of nodes that didn't fail the RPC (at RPC
5277 # level), so that nodes with a non-responding node daemon don't
5278 # make all OSes invalid
5279 good_nodes = [node_name for node_name in rlist
5280 if not rlist[node_name].fail_msg]
5281 for node_name, nr in rlist.items():
5282 if nr.fail_msg or not nr.payload:
5284 for (name, path, status, diagnose, params) in nr.payload:
5285 if name not in all_es:
5286 # build a list of nodes for this os containing empty lists
5287 # for each node in node_list
5289 for nname in good_nodes:
5290 all_es[name][nname] = []
5291 # convert params from [name, help] to (name, help)
5292 params = [tuple(v) for v in params]
5293 all_es[name][node_name].append((path, status, diagnose, params))
5296 def _GetQueryData(self, lu):
5297 """Computes the list of nodes and their attributes.
5300 # Locking is not used
5301 assert not (compat.any(lu.glm.is_owned(level)
5302 for level in locking.LEVELS
5303 if level != locking.LEVEL_CLUSTER) or
5304 self.do_locking or self.use_locking)
5306 valid_nodes = [node.name
5307 for node in lu.cfg.GetAllNodesInfo().values()
5308 if not node.offline and node.vm_capable]
5309 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5313 nodegroup_list = lu.cfg.GetNodeGroupList()
5315 for (es_name, es_data) in pol.items():
5316 # For every provider compute the nodegroup validity.
5317 # To do this we need to check the validity of each node in es_data
5318 # and then construct the corresponding nodegroup dict:
5319 # { nodegroup1: status
5320 # nodegroup2: status
5323 for nodegroup in nodegroup_list:
5324 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5326 nodegroup_nodes = ndgrp.members
5327 nodegroup_name = ndgrp.name
5330 for node in nodegroup_nodes:
5331 if node in valid_nodes:
5332 if es_data[node] != []:
5333 node_status = es_data[node][0][1]
5334 node_statuses.append(node_status)
5336 node_statuses.append(False)
5338 if False in node_statuses:
5339 ndgrp_data[nodegroup_name] = False
5341 ndgrp_data[nodegroup_name] = True
5343 # Compute the provider's parameters
5345 for idx, esl in enumerate(es_data.values()):
5346 valid = bool(esl and esl[0][1])
5350 node_params = esl[0][3]
5353 parameters.update(node_params)
5355 # Filter out inconsistent values
5356 parameters.intersection_update(node_params)
5358 params = list(parameters)
5360 # Now fill all the info for this provider
5361 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5362 nodegroup_status=ndgrp_data,
5365 data[es_name] = info
5367 # Prepare data in requested order
5368 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5372 class LUExtStorageDiagnose(NoHooksLU):
5373 """Logical unit for ExtStorage diagnose/query.
5378 def CheckArguments(self):
5379 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5380 self.op.output_fields, False)
5382 def ExpandNames(self):
5383 self.eq.ExpandNames(self)
5385 def Exec(self, feedback_fn):
5386 return self.eq.OldStyleQuery(self)
5389 class LUNodeRemove(LogicalUnit):
5390 """Logical unit for removing a node.
5393 HPATH = "node-remove"
5394 HTYPE = constants.HTYPE_NODE
5396 def BuildHooksEnv(self):
5401 "OP_TARGET": self.op.node_name,
5402 "NODE_NAME": self.op.node_name,
5405 def BuildHooksNodes(self):
5406 """Build hooks nodes.
5408 This doesn't run on the target node in the pre phase as a failed
5409 node would then be impossible to remove.
5412 all_nodes = self.cfg.GetNodeList()
5414 all_nodes.remove(self.op.node_name)
5417 return (all_nodes, all_nodes)
5419 def CheckPrereq(self):
5420 """Check prerequisites.
5423 - the node exists in the configuration
5424 - it does not have primary or secondary instances
5425 - it's not the master
5427 Any errors are signaled by raising errors.OpPrereqError.
5430 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5431 node = self.cfg.GetNodeInfo(self.op.node_name)
5432 assert node is not None
5434 masternode = self.cfg.GetMasterNode()
5435 if node.name == masternode:
5436 raise errors.OpPrereqError("Node is the master node, failover to another"
5437 " node is required", errors.ECODE_INVAL)
5439 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5440 if node.name in instance.all_nodes:
5441 raise errors.OpPrereqError("Instance %s is still running on the node,"
5442 " please remove first" % instance_name,
5444 self.op.node_name = node.name
5447 def Exec(self, feedback_fn):
5448 """Removes the node from the cluster.
5452 logging.info("Stopping the node daemon and removing configs from node %s",
5455 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5457 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5460 # Promote nodes to master candidate as needed
5461 _AdjustCandidatePool(self, exceptions=[node.name])
5462 self.context.RemoveNode(node.name)
5464 # Run post hooks on the node before it's removed
5465 _RunPostHook(self, node.name)
5467 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5468 msg = result.fail_msg
5470 self.LogWarning("Errors encountered on the remote node while leaving"
5471 " the cluster: %s", msg)
5473 # Remove node from our /etc/hosts
5474 if self.cfg.GetClusterInfo().modify_etc_hosts:
5475 master_node = self.cfg.GetMasterNode()
5476 result = self.rpc.call_etc_hosts_modify(master_node,
5477 constants.ETC_HOSTS_REMOVE,
5479 result.Raise("Can't update hosts file with new host data")
5480 _RedistributeAncillaryFiles(self)
5483 class _NodeQuery(_QueryBase):
5484 FIELDS = query.NODE_FIELDS
5486 def ExpandNames(self, lu):
5487 lu.needed_locks = {}
5488 lu.share_locks = _ShareAll()
5491 self.wanted = _GetWantedNodes(lu, self.names)
5493 self.wanted = locking.ALL_SET
5495 self.do_locking = (self.use_locking and
5496 query.NQ_LIVE in self.requested_data)
5499 # If any non-static field is requested we need to lock the nodes
5500 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5501 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5503 def DeclareLocks(self, lu, level):
5506 def _GetQueryData(self, lu):
5507 """Computes the list of nodes and their attributes.
5510 all_info = lu.cfg.GetAllNodesInfo()
5512 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5514 # Gather data as requested
5515 if query.NQ_LIVE in self.requested_data:
5516 # filter out non-vm_capable nodes
5517 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5519 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5520 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5521 [lu.cfg.GetHypervisorType()], es_flags)
5522 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5523 for (name, nresult) in node_data.items()
5524 if not nresult.fail_msg and nresult.payload)
5528 if query.NQ_INST in self.requested_data:
5529 node_to_primary = dict([(name, set()) for name in nodenames])
5530 node_to_secondary = dict([(name, set()) for name in nodenames])
5532 inst_data = lu.cfg.GetAllInstancesInfo()
5534 for inst in inst_data.values():
5535 if inst.primary_node in node_to_primary:
5536 node_to_primary[inst.primary_node].add(inst.name)
5537 for secnode in inst.secondary_nodes:
5538 if secnode in node_to_secondary:
5539 node_to_secondary[secnode].add(inst.name)
5541 node_to_primary = None
5542 node_to_secondary = None
5544 if query.NQ_OOB in self.requested_data:
5545 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5546 for name, node in all_info.iteritems())
5550 if query.NQ_GROUP in self.requested_data:
5551 groups = lu.cfg.GetAllNodeGroupsInfo()
5555 return query.NodeQueryData([all_info[name] for name in nodenames],
5556 live_data, lu.cfg.GetMasterNode(),
5557 node_to_primary, node_to_secondary, groups,
5558 oob_support, lu.cfg.GetClusterInfo())
5561 class LUNodeQuery(NoHooksLU):
5562 """Logical unit for querying nodes.
5565 # pylint: disable=W0142
5568 def CheckArguments(self):
5569 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5570 self.op.output_fields, self.op.use_locking)
5572 def ExpandNames(self):
5573 self.nq.ExpandNames(self)
5575 def DeclareLocks(self, level):
5576 self.nq.DeclareLocks(self, level)
5578 def Exec(self, feedback_fn):
5579 return self.nq.OldStyleQuery(self)
5582 class LUNodeQueryvols(NoHooksLU):
5583 """Logical unit for getting volumes on node(s).
5587 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5588 _FIELDS_STATIC = utils.FieldSet("node")
5590 def CheckArguments(self):
5591 _CheckOutputFields(static=self._FIELDS_STATIC,
5592 dynamic=self._FIELDS_DYNAMIC,
5593 selected=self.op.output_fields)
5595 def ExpandNames(self):
5596 self.share_locks = _ShareAll()
5599 self.needed_locks = {
5600 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5603 self.needed_locks = {
5604 locking.LEVEL_NODE: locking.ALL_SET,
5605 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5608 def Exec(self, feedback_fn):
5609 """Computes the list of nodes and their attributes.
5612 nodenames = self.owned_locks(locking.LEVEL_NODE)
5613 volumes = self.rpc.call_node_volumes(nodenames)
5615 ilist = self.cfg.GetAllInstancesInfo()
5616 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5619 for node in nodenames:
5620 nresult = volumes[node]
5623 msg = nresult.fail_msg
5625 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5628 node_vols = sorted(nresult.payload,
5629 key=operator.itemgetter("dev"))
5631 for vol in node_vols:
5633 for field in self.op.output_fields:
5636 elif field == "phys":
5640 elif field == "name":
5642 elif field == "size":
5643 val = int(float(vol["size"]))
5644 elif field == "instance":
5645 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5647 raise errors.ParameterError(field)
5648 node_output.append(str(val))
5650 output.append(node_output)
5655 class LUNodeQueryStorage(NoHooksLU):
5656 """Logical unit for getting information on storage units on node(s).
5659 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5662 def CheckArguments(self):
5663 _CheckOutputFields(static=self._FIELDS_STATIC,
5664 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5665 selected=self.op.output_fields)
5667 def ExpandNames(self):
5668 self.share_locks = _ShareAll()
5671 self.needed_locks = {
5672 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5675 self.needed_locks = {
5676 locking.LEVEL_NODE: locking.ALL_SET,
5677 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5680 def Exec(self, feedback_fn):
5681 """Computes the list of nodes and their attributes.
5684 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5686 # Always get name to sort by
5687 if constants.SF_NAME in self.op.output_fields:
5688 fields = self.op.output_fields[:]
5690 fields = [constants.SF_NAME] + self.op.output_fields
5692 # Never ask for node or type as it's only known to the LU
5693 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5694 while extra in fields:
5695 fields.remove(extra)
5697 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5698 name_idx = field_idx[constants.SF_NAME]
5700 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5701 data = self.rpc.call_storage_list(self.nodes,
5702 self.op.storage_type, st_args,
5703 self.op.name, fields)
5707 for node in utils.NiceSort(self.nodes):
5708 nresult = data[node]
5712 msg = nresult.fail_msg
5714 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5717 rows = dict([(row[name_idx], row) for row in nresult.payload])
5719 for name in utils.NiceSort(rows.keys()):
5724 for field in self.op.output_fields:
5725 if field == constants.SF_NODE:
5727 elif field == constants.SF_TYPE:
5728 val = self.op.storage_type
5729 elif field in field_idx:
5730 val = row[field_idx[field]]
5732 raise errors.ParameterError(field)
5741 class _InstanceQuery(_QueryBase):
5742 FIELDS = query.INSTANCE_FIELDS
5744 def ExpandNames(self, lu):
5745 lu.needed_locks = {}
5746 lu.share_locks = _ShareAll()
5749 self.wanted = _GetWantedInstances(lu, self.names)
5751 self.wanted = locking.ALL_SET
5753 self.do_locking = (self.use_locking and
5754 query.IQ_LIVE in self.requested_data)
5756 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5757 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5758 lu.needed_locks[locking.LEVEL_NODE] = []
5759 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5761 self.do_grouplocks = (self.do_locking and
5762 query.IQ_NODES in self.requested_data)
5764 def DeclareLocks(self, lu, level):
5766 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5767 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5769 # Lock all groups used by instances optimistically; this requires going
5770 # via the node before it's locked, requiring verification later on
5771 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5773 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5774 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5775 elif level == locking.LEVEL_NODE:
5776 lu._LockInstancesNodes() # pylint: disable=W0212
5779 def _CheckGroupLocks(lu):
5780 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5781 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5783 # Check if node groups for locked instances are still correct
5784 for instance_name in owned_instances:
5785 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5787 def _GetQueryData(self, lu):
5788 """Computes the list of instances and their attributes.
5791 if self.do_grouplocks:
5792 self._CheckGroupLocks(lu)
5794 cluster = lu.cfg.GetClusterInfo()
5795 all_info = lu.cfg.GetAllInstancesInfo()
5797 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5799 instance_list = [all_info[name] for name in instance_names]
5800 nodes = frozenset(itertools.chain(*(inst.all_nodes
5801 for inst in instance_list)))
5802 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5805 wrongnode_inst = set()
5807 # Gather data as requested
5808 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5810 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5812 result = node_data[name]
5814 # offline nodes will be in both lists
5815 assert result.fail_msg
5816 offline_nodes.append(name)
5818 bad_nodes.append(name)
5819 elif result.payload:
5820 for inst in result.payload:
5821 if inst in all_info:
5822 if all_info[inst].primary_node == name:
5823 live_data.update(result.payload)
5825 wrongnode_inst.add(inst)
5827 # orphan instance; we don't list it here as we don't
5828 # handle this case yet in the output of instance listing
5829 logging.warning("Orphan instance '%s' found on node %s",
5831 # else no instance is alive
5835 if query.IQ_DISKUSAGE in self.requested_data:
5836 gmi = ganeti.masterd.instance
5837 disk_usage = dict((inst.name,
5838 gmi.ComputeDiskSize(inst.disk_template,
5839 [{constants.IDISK_SIZE: disk.size}
5840 for disk in inst.disks]))
5841 for inst in instance_list)
5845 if query.IQ_CONSOLE in self.requested_data:
5847 for inst in instance_list:
5848 if inst.name in live_data:
5849 # Instance is running
5850 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5852 consinfo[inst.name] = None
5853 assert set(consinfo.keys()) == set(instance_names)
5857 if query.IQ_NODES in self.requested_data:
5858 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5860 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5861 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5862 for uuid in set(map(operator.attrgetter("group"),
5868 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5869 disk_usage, offline_nodes, bad_nodes,
5870 live_data, wrongnode_inst, consinfo,
5874 class LUQuery(NoHooksLU):
5875 """Query for resources/items of a certain kind.
5878 # pylint: disable=W0142
5881 def CheckArguments(self):
5882 qcls = _GetQueryImplementation(self.op.what)
5884 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5886 def ExpandNames(self):
5887 self.impl.ExpandNames(self)
5889 def DeclareLocks(self, level):
5890 self.impl.DeclareLocks(self, level)
5892 def Exec(self, feedback_fn):
5893 return self.impl.NewStyleQuery(self)
5896 class LUQueryFields(NoHooksLU):
5897 """Query for resources/items of a certain kind.
5900 # pylint: disable=W0142
5903 def CheckArguments(self):
5904 self.qcls = _GetQueryImplementation(self.op.what)
5906 def ExpandNames(self):
5907 self.needed_locks = {}
5909 def Exec(self, feedback_fn):
5910 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5913 class LUNodeModifyStorage(NoHooksLU):
5914 """Logical unit for modifying a storage volume on a node.
5919 def CheckArguments(self):
5920 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5922 storage_type = self.op.storage_type
5925 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5927 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5928 " modified" % storage_type,
5931 diff = set(self.op.changes.keys()) - modifiable
5933 raise errors.OpPrereqError("The following fields can not be modified for"
5934 " storage units of type '%s': %r" %
5935 (storage_type, list(diff)),
5938 def ExpandNames(self):
5939 self.needed_locks = {
5940 locking.LEVEL_NODE: self.op.node_name,
5943 def Exec(self, feedback_fn):
5944 """Computes the list of nodes and their attributes.
5947 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5948 result = self.rpc.call_storage_modify(self.op.node_name,
5949 self.op.storage_type, st_args,
5950 self.op.name, self.op.changes)
5951 result.Raise("Failed to modify storage unit '%s' on %s" %
5952 (self.op.name, self.op.node_name))
5955 class LUNodeAdd(LogicalUnit):
5956 """Logical unit for adding node to the cluster.
5960 HTYPE = constants.HTYPE_NODE
5961 _NFLAGS = ["master_capable", "vm_capable"]
5963 def CheckArguments(self):
5964 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5965 # validate/normalize the node name
5966 self.hostname = netutils.GetHostname(name=self.op.node_name,
5967 family=self.primary_ip_family)
5968 self.op.node_name = self.hostname.name
5970 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5971 raise errors.OpPrereqError("Cannot readd the master node",
5974 if self.op.readd and self.op.group:
5975 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5976 " being readded", errors.ECODE_INVAL)
5978 def BuildHooksEnv(self):
5981 This will run on all nodes before, and on all nodes + the new node after.
5985 "OP_TARGET": self.op.node_name,
5986 "NODE_NAME": self.op.node_name,
5987 "NODE_PIP": self.op.primary_ip,
5988 "NODE_SIP": self.op.secondary_ip,
5989 "MASTER_CAPABLE": str(self.op.master_capable),
5990 "VM_CAPABLE": str(self.op.vm_capable),
5993 def BuildHooksNodes(self):
5994 """Build hooks nodes.
5997 # Exclude added node
5998 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5999 post_nodes = pre_nodes + [self.op.node_name, ]
6001 return (pre_nodes, post_nodes)
6003 def CheckPrereq(self):
6004 """Check prerequisites.
6007 - the new node is not already in the config
6009 - its parameters (single/dual homed) matches the cluster
6011 Any errors are signaled by raising errors.OpPrereqError.
6015 hostname = self.hostname
6016 node = hostname.name
6017 primary_ip = self.op.primary_ip = hostname.ip
6018 if self.op.secondary_ip is None:
6019 if self.primary_ip_family == netutils.IP6Address.family:
6020 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6021 " IPv4 address must be given as secondary",
6023 self.op.secondary_ip = primary_ip
6025 secondary_ip = self.op.secondary_ip
6026 if not netutils.IP4Address.IsValid(secondary_ip):
6027 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6028 " address" % secondary_ip, errors.ECODE_INVAL)
6030 node_list = cfg.GetNodeList()
6031 if not self.op.readd and node in node_list:
6032 raise errors.OpPrereqError("Node %s is already in the configuration" %
6033 node, errors.ECODE_EXISTS)
6034 elif self.op.readd and node not in node_list:
6035 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6038 self.changed_primary_ip = False
6040 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6041 if self.op.readd and node == existing_node_name:
6042 if existing_node.secondary_ip != secondary_ip:
6043 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6044 " address configuration as before",
6046 if existing_node.primary_ip != primary_ip:
6047 self.changed_primary_ip = True
6051 if (existing_node.primary_ip == primary_ip or
6052 existing_node.secondary_ip == primary_ip or
6053 existing_node.primary_ip == secondary_ip or
6054 existing_node.secondary_ip == secondary_ip):
6055 raise errors.OpPrereqError("New node ip address(es) conflict with"
6056 " existing node %s" % existing_node.name,
6057 errors.ECODE_NOTUNIQUE)
6059 # After this 'if' block, None is no longer a valid value for the
6060 # _capable op attributes
6062 old_node = self.cfg.GetNodeInfo(node)
6063 assert old_node is not None, "Can't retrieve locked node %s" % node
6064 for attr in self._NFLAGS:
6065 if getattr(self.op, attr) is None:
6066 setattr(self.op, attr, getattr(old_node, attr))
6068 for attr in self._NFLAGS:
6069 if getattr(self.op, attr) is None:
6070 setattr(self.op, attr, True)
6072 if self.op.readd and not self.op.vm_capable:
6073 pri, sec = cfg.GetNodeInstances(node)
6075 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6076 " flag set to false, but it already holds"
6077 " instances" % node,
6080 # check that the type of the node (single versus dual homed) is the
6081 # same as for the master
6082 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6083 master_singlehomed = myself.secondary_ip == myself.primary_ip
6084 newbie_singlehomed = secondary_ip == primary_ip
6085 if master_singlehomed != newbie_singlehomed:
6086 if master_singlehomed:
6087 raise errors.OpPrereqError("The master has no secondary ip but the"
6088 " new node has one",
6091 raise errors.OpPrereqError("The master has a secondary ip but the"
6092 " new node doesn't have one",
6095 # checks reachability
6096 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6097 raise errors.OpPrereqError("Node not reachable by ping",
6098 errors.ECODE_ENVIRON)
6100 if not newbie_singlehomed:
6101 # check reachability from my secondary ip to newbie's secondary ip
6102 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6103 source=myself.secondary_ip):
6104 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6105 " based ping to node daemon port",
6106 errors.ECODE_ENVIRON)
6113 if self.op.master_capable:
6114 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6116 self.master_candidate = False
6119 self.new_node = old_node
6121 node_group = cfg.LookupNodeGroup(self.op.group)
6122 self.new_node = objects.Node(name=node,
6123 primary_ip=primary_ip,
6124 secondary_ip=secondary_ip,
6125 master_candidate=self.master_candidate,
6126 offline=False, drained=False,
6127 group=node_group, ndparams={})
6129 if self.op.ndparams:
6130 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6131 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6132 "node", "cluster or group")
6134 if self.op.hv_state:
6135 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6137 if self.op.disk_state:
6138 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6140 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6141 # it a property on the base class.
6142 rpcrunner = rpc.DnsOnlyRunner()
6143 result = rpcrunner.call_version([node])[node]
6144 result.Raise("Can't get version information from node %s" % node)
6145 if constants.PROTOCOL_VERSION == result.payload:
6146 logging.info("Communication to node %s fine, sw version %s match",
6147 node, result.payload)
6149 raise errors.OpPrereqError("Version mismatch master version %s,"
6150 " node version %s" %
6151 (constants.PROTOCOL_VERSION, result.payload),
6152 errors.ECODE_ENVIRON)
6154 vg_name = cfg.GetVGName()
6155 if vg_name is not None:
6156 vparams = {constants.NV_PVLIST: [vg_name]}
6157 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6158 cname = self.cfg.GetClusterName()
6159 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6160 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6162 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6163 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6165 def Exec(self, feedback_fn):
6166 """Adds the new node to the cluster.
6169 new_node = self.new_node
6170 node = new_node.name
6172 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6175 # We adding a new node so we assume it's powered
6176 new_node.powered = True
6178 # for re-adds, reset the offline/drained/master-candidate flags;
6179 # we need to reset here, otherwise offline would prevent RPC calls
6180 # later in the procedure; this also means that if the re-add
6181 # fails, we are left with a non-offlined, broken node
6183 new_node.drained = new_node.offline = False # pylint: disable=W0201
6184 self.LogInfo("Readding a node, the offline/drained flags were reset")
6185 # if we demote the node, we do cleanup later in the procedure
6186 new_node.master_candidate = self.master_candidate
6187 if self.changed_primary_ip:
6188 new_node.primary_ip = self.op.primary_ip
6190 # copy the master/vm_capable flags
6191 for attr in self._NFLAGS:
6192 setattr(new_node, attr, getattr(self.op, attr))
6194 # notify the user about any possible mc promotion
6195 if new_node.master_candidate:
6196 self.LogInfo("Node will be a master candidate")
6198 if self.op.ndparams:
6199 new_node.ndparams = self.op.ndparams
6201 new_node.ndparams = {}
6203 if self.op.hv_state:
6204 new_node.hv_state_static = self.new_hv_state
6206 if self.op.disk_state:
6207 new_node.disk_state_static = self.new_disk_state
6209 # Add node to our /etc/hosts, and add key to known_hosts
6210 if self.cfg.GetClusterInfo().modify_etc_hosts:
6211 master_node = self.cfg.GetMasterNode()
6212 result = self.rpc.call_etc_hosts_modify(master_node,
6213 constants.ETC_HOSTS_ADD,
6216 result.Raise("Can't update hosts file with new host data")
6218 if new_node.secondary_ip != new_node.primary_ip:
6219 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6222 node_verify_list = [self.cfg.GetMasterNode()]
6223 node_verify_param = {
6224 constants.NV_NODELIST: ([node], {}),
6225 # TODO: do a node-net-test as well?
6228 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6229 self.cfg.GetClusterName())
6230 for verifier in node_verify_list:
6231 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6232 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6234 for failed in nl_payload:
6235 feedback_fn("ssh/hostname verification failed"
6236 " (checking from %s): %s" %
6237 (verifier, nl_payload[failed]))
6238 raise errors.OpExecError("ssh/hostname verification failed")
6241 _RedistributeAncillaryFiles(self)
6242 self.context.ReaddNode(new_node)
6243 # make sure we redistribute the config
6244 self.cfg.Update(new_node, feedback_fn)
6245 # and make sure the new node will not have old files around
6246 if not new_node.master_candidate:
6247 result = self.rpc.call_node_demote_from_mc(new_node.name)
6248 msg = result.fail_msg
6250 self.LogWarning("Node failed to demote itself from master"
6251 " candidate status: %s" % msg)
6253 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6254 additional_vm=self.op.vm_capable)
6255 self.context.AddNode(new_node, self.proc.GetECId())
6258 class LUNodeSetParams(LogicalUnit):
6259 """Modifies the parameters of a node.
6261 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6262 to the node role (as _ROLE_*)
6263 @cvar _R2F: a dictionary from node role to tuples of flags
6264 @cvar _FLAGS: a list of attribute names corresponding to the flags
6267 HPATH = "node-modify"
6268 HTYPE = constants.HTYPE_NODE
6270 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6272 (True, False, False): _ROLE_CANDIDATE,
6273 (False, True, False): _ROLE_DRAINED,
6274 (False, False, True): _ROLE_OFFLINE,
6275 (False, False, False): _ROLE_REGULAR,
6277 _R2F = dict((v, k) for k, v in _F2R.items())
6278 _FLAGS = ["master_candidate", "drained", "offline"]
6280 def CheckArguments(self):
6281 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6282 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6283 self.op.master_capable, self.op.vm_capable,
6284 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6286 if all_mods.count(None) == len(all_mods):
6287 raise errors.OpPrereqError("Please pass at least one modification",
6289 if all_mods.count(True) > 1:
6290 raise errors.OpPrereqError("Can't set the node into more than one"
6291 " state at the same time",
6294 # Boolean value that tells us whether we might be demoting from MC
6295 self.might_demote = (self.op.master_candidate is False or
6296 self.op.offline is True or
6297 self.op.drained is True or
6298 self.op.master_capable is False)
6300 if self.op.secondary_ip:
6301 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6302 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6303 " address" % self.op.secondary_ip,
6306 self.lock_all = self.op.auto_promote and self.might_demote
6307 self.lock_instances = self.op.secondary_ip is not None
6309 def _InstanceFilter(self, instance):
6310 """Filter for getting affected instances.
6313 return (instance.disk_template in constants.DTS_INT_MIRROR and
6314 self.op.node_name in instance.all_nodes)
6316 def ExpandNames(self):
6318 self.needed_locks = {
6319 locking.LEVEL_NODE: locking.ALL_SET,
6321 # Block allocations when all nodes are locked
6322 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6325 self.needed_locks = {
6326 locking.LEVEL_NODE: self.op.node_name,
6329 # Since modifying a node can have severe effects on currently running
6330 # operations the resource lock is at least acquired in shared mode
6331 self.needed_locks[locking.LEVEL_NODE_RES] = \
6332 self.needed_locks[locking.LEVEL_NODE]
6334 # Get all locks except nodes in shared mode; they are not used for anything
6335 # but read-only access
6336 self.share_locks = _ShareAll()
6337 self.share_locks[locking.LEVEL_NODE] = 0
6338 self.share_locks[locking.LEVEL_NODE_RES] = 0
6339 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6341 if self.lock_instances:
6342 self.needed_locks[locking.LEVEL_INSTANCE] = \
6343 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6345 def BuildHooksEnv(self):
6348 This runs on the master node.
6352 "OP_TARGET": self.op.node_name,
6353 "MASTER_CANDIDATE": str(self.op.master_candidate),
6354 "OFFLINE": str(self.op.offline),
6355 "DRAINED": str(self.op.drained),
6356 "MASTER_CAPABLE": str(self.op.master_capable),
6357 "VM_CAPABLE": str(self.op.vm_capable),
6360 def BuildHooksNodes(self):
6361 """Build hooks nodes.
6364 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6367 def CheckPrereq(self):
6368 """Check prerequisites.
6370 This only checks the instance list against the existing names.
6373 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6375 if self.lock_instances:
6376 affected_instances = \
6377 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6379 # Verify instance locks
6380 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6381 wanted_instances = frozenset(affected_instances.keys())
6382 if wanted_instances - owned_instances:
6383 raise errors.OpPrereqError("Instances affected by changing node %s's"
6384 " secondary IP address have changed since"
6385 " locks were acquired, wanted '%s', have"
6386 " '%s'; retry the operation" %
6388 utils.CommaJoin(wanted_instances),
6389 utils.CommaJoin(owned_instances)),
6392 affected_instances = None
6394 if (self.op.master_candidate is not None or
6395 self.op.drained is not None or
6396 self.op.offline is not None):
6397 # we can't change the master's node flags
6398 if self.op.node_name == self.cfg.GetMasterNode():
6399 raise errors.OpPrereqError("The master role can be changed"
6400 " only via master-failover",
6403 if self.op.master_candidate and not node.master_capable:
6404 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6405 " it a master candidate" % node.name,
6408 if self.op.vm_capable is False:
6409 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6411 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6412 " the vm_capable flag" % node.name,
6415 if node.master_candidate and self.might_demote and not self.lock_all:
6416 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6417 # check if after removing the current node, we're missing master
6419 (mc_remaining, mc_should, _) = \
6420 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6421 if mc_remaining < mc_should:
6422 raise errors.OpPrereqError("Not enough master candidates, please"
6423 " pass auto promote option to allow"
6424 " promotion (--auto-promote or RAPI"
6425 " auto_promote=True)", errors.ECODE_STATE)
6427 self.old_flags = old_flags = (node.master_candidate,
6428 node.drained, node.offline)
6429 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6430 self.old_role = old_role = self._F2R[old_flags]
6432 # Check for ineffective changes
6433 for attr in self._FLAGS:
6434 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6435 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6436 setattr(self.op, attr, None)
6438 # Past this point, any flag change to False means a transition
6439 # away from the respective state, as only real changes are kept
6441 # TODO: We might query the real power state if it supports OOB
6442 if _SupportsOob(self.cfg, node):
6443 if self.op.offline is False and not (node.powered or
6444 self.op.powered is True):
6445 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6446 " offline status can be reset") %
6447 self.op.node_name, errors.ECODE_STATE)
6448 elif self.op.powered is not None:
6449 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6450 " as it does not support out-of-band"
6451 " handling") % self.op.node_name,
6454 # If we're being deofflined/drained, we'll MC ourself if needed
6455 if (self.op.drained is False or self.op.offline is False or
6456 (self.op.master_capable and not node.master_capable)):
6457 if _DecideSelfPromotion(self):
6458 self.op.master_candidate = True
6459 self.LogInfo("Auto-promoting node to master candidate")
6461 # If we're no longer master capable, we'll demote ourselves from MC
6462 if self.op.master_capable is False and node.master_candidate:
6463 self.LogInfo("Demoting from master candidate")
6464 self.op.master_candidate = False
6467 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6468 if self.op.master_candidate:
6469 new_role = self._ROLE_CANDIDATE
6470 elif self.op.drained:
6471 new_role = self._ROLE_DRAINED
6472 elif self.op.offline:
6473 new_role = self._ROLE_OFFLINE
6474 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6475 # False is still in new flags, which means we're un-setting (the
6477 new_role = self._ROLE_REGULAR
6478 else: # no new flags, nothing, keep old role
6481 self.new_role = new_role
6483 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6484 # Trying to transition out of offline status
6485 result = self.rpc.call_version([node.name])[node.name]
6487 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6488 " to report its version: %s" %
6489 (node.name, result.fail_msg),
6492 self.LogWarning("Transitioning node from offline to online state"
6493 " without using re-add. Please make sure the node"
6496 # When changing the secondary ip, verify if this is a single-homed to
6497 # multi-homed transition or vice versa, and apply the relevant
6499 if self.op.secondary_ip:
6500 # Ok even without locking, because this can't be changed by any LU
6501 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6502 master_singlehomed = master.secondary_ip == master.primary_ip
6503 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6504 if self.op.force and node.name == master.name:
6505 self.LogWarning("Transitioning from single-homed to multi-homed"
6506 " cluster; all nodes will require a secondary IP"
6509 raise errors.OpPrereqError("Changing the secondary ip on a"
6510 " single-homed cluster requires the"
6511 " --force option to be passed, and the"
6512 " target node to be the master",
6514 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6515 if self.op.force and node.name == master.name:
6516 self.LogWarning("Transitioning from multi-homed to single-homed"
6517 " cluster; secondary IP addresses will have to be"
6520 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6521 " same as the primary IP on a multi-homed"
6522 " cluster, unless the --force option is"
6523 " passed, and the target node is the"
6524 " master", errors.ECODE_INVAL)
6526 assert not (frozenset(affected_instances) -
6527 self.owned_locks(locking.LEVEL_INSTANCE))
6530 if affected_instances:
6531 msg = ("Cannot change secondary IP address: offline node has"
6532 " instances (%s) configured to use it" %
6533 utils.CommaJoin(affected_instances.keys()))
6534 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6536 # On online nodes, check that no instances are running, and that
6537 # the node has the new ip and we can reach it.
6538 for instance in affected_instances.values():
6539 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6540 msg="cannot change secondary ip")
6542 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6543 if master.name != node.name:
6544 # check reachability from master secondary ip to new secondary ip
6545 if not netutils.TcpPing(self.op.secondary_ip,
6546 constants.DEFAULT_NODED_PORT,
6547 source=master.secondary_ip):
6548 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6549 " based ping to node daemon port",
6550 errors.ECODE_ENVIRON)
6552 if self.op.ndparams:
6553 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6554 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6555 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6556 "node", "cluster or group")
6557 self.new_ndparams = new_ndparams
6559 if self.op.hv_state:
6560 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6561 self.node.hv_state_static)
6563 if self.op.disk_state:
6564 self.new_disk_state = \
6565 _MergeAndVerifyDiskState(self.op.disk_state,
6566 self.node.disk_state_static)
6568 def Exec(self, feedback_fn):
6573 old_role = self.old_role
6574 new_role = self.new_role
6578 if self.op.ndparams:
6579 node.ndparams = self.new_ndparams
6581 if self.op.powered is not None:
6582 node.powered = self.op.powered
6584 if self.op.hv_state:
6585 node.hv_state_static = self.new_hv_state
6587 if self.op.disk_state:
6588 node.disk_state_static = self.new_disk_state
6590 for attr in ["master_capable", "vm_capable"]:
6591 val = getattr(self.op, attr)
6593 setattr(node, attr, val)
6594 result.append((attr, str(val)))
6596 if new_role != old_role:
6597 # Tell the node to demote itself, if no longer MC and not offline
6598 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6599 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6601 self.LogWarning("Node failed to demote itself: %s", msg)
6603 new_flags = self._R2F[new_role]
6604 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6606 result.append((desc, str(nf)))
6607 (node.master_candidate, node.drained, node.offline) = new_flags
6609 # we locked all nodes, we adjust the CP before updating this node
6611 _AdjustCandidatePool(self, [node.name])
6613 if self.op.secondary_ip:
6614 node.secondary_ip = self.op.secondary_ip
6615 result.append(("secondary_ip", self.op.secondary_ip))
6617 # this will trigger configuration file update, if needed
6618 self.cfg.Update(node, feedback_fn)
6620 # this will trigger job queue propagation or cleanup if the mc
6622 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6623 self.context.ReaddNode(node)
6628 class LUNodePowercycle(NoHooksLU):
6629 """Powercycles a node.
6634 def CheckArguments(self):
6635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6636 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6637 raise errors.OpPrereqError("The node is the master and the force"
6638 " parameter was not set",
6641 def ExpandNames(self):
6642 """Locking for PowercycleNode.
6644 This is a last-resort option and shouldn't block on other
6645 jobs. Therefore, we grab no locks.
6648 self.needed_locks = {}
6650 def Exec(self, feedback_fn):
6654 result = self.rpc.call_node_powercycle(self.op.node_name,
6655 self.cfg.GetHypervisorType())
6656 result.Raise("Failed to schedule the reboot")
6657 return result.payload
6660 class LUClusterQuery(NoHooksLU):
6661 """Query cluster configuration.
6666 def ExpandNames(self):
6667 self.needed_locks = {}
6669 def Exec(self, feedback_fn):
6670 """Return cluster config.
6673 cluster = self.cfg.GetClusterInfo()
6676 # Filter just for enabled hypervisors
6677 for os_name, hv_dict in cluster.os_hvp.items():
6678 os_hvp[os_name] = {}
6679 for hv_name, hv_params in hv_dict.items():
6680 if hv_name in cluster.enabled_hypervisors:
6681 os_hvp[os_name][hv_name] = hv_params
6683 # Convert ip_family to ip_version
6684 primary_ip_version = constants.IP4_VERSION
6685 if cluster.primary_ip_family == netutils.IP6Address.family:
6686 primary_ip_version = constants.IP6_VERSION
6689 "software_version": constants.RELEASE_VERSION,
6690 "protocol_version": constants.PROTOCOL_VERSION,
6691 "config_version": constants.CONFIG_VERSION,
6692 "os_api_version": max(constants.OS_API_VERSIONS),
6693 "export_version": constants.EXPORT_VERSION,
6694 "architecture": runtime.GetArchInfo(),
6695 "name": cluster.cluster_name,
6696 "master": cluster.master_node,
6697 "default_hypervisor": cluster.primary_hypervisor,
6698 "enabled_hypervisors": cluster.enabled_hypervisors,
6699 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6700 for hypervisor_name in cluster.enabled_hypervisors]),
6702 "beparams": cluster.beparams,
6703 "osparams": cluster.osparams,
6704 "ipolicy": cluster.ipolicy,
6705 "nicparams": cluster.nicparams,
6706 "ndparams": cluster.ndparams,
6707 "diskparams": cluster.diskparams,
6708 "candidate_pool_size": cluster.candidate_pool_size,
6709 "master_netdev": cluster.master_netdev,
6710 "master_netmask": cluster.master_netmask,
6711 "use_external_mip_script": cluster.use_external_mip_script,
6712 "volume_group_name": cluster.volume_group_name,
6713 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6714 "file_storage_dir": cluster.file_storage_dir,
6715 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6716 "maintain_node_health": cluster.maintain_node_health,
6717 "ctime": cluster.ctime,
6718 "mtime": cluster.mtime,
6719 "uuid": cluster.uuid,
6720 "tags": list(cluster.GetTags()),
6721 "uid_pool": cluster.uid_pool,
6722 "default_iallocator": cluster.default_iallocator,
6723 "reserved_lvs": cluster.reserved_lvs,
6724 "primary_ip_version": primary_ip_version,
6725 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6726 "hidden_os": cluster.hidden_os,
6727 "blacklisted_os": cluster.blacklisted_os,
6733 class LUClusterConfigQuery(NoHooksLU):
6734 """Return configuration values.
6739 def CheckArguments(self):
6740 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6742 def ExpandNames(self):
6743 self.cq.ExpandNames(self)
6745 def DeclareLocks(self, level):
6746 self.cq.DeclareLocks(self, level)
6748 def Exec(self, feedback_fn):
6749 result = self.cq.OldStyleQuery(self)
6751 assert len(result) == 1
6756 class _ClusterQuery(_QueryBase):
6757 FIELDS = query.CLUSTER_FIELDS
6759 #: Do not sort (there is only one item)
6762 def ExpandNames(self, lu):
6763 lu.needed_locks = {}
6765 # The following variables interact with _QueryBase._GetNames
6766 self.wanted = locking.ALL_SET
6767 self.do_locking = self.use_locking
6770 raise errors.OpPrereqError("Can not use locking for cluster queries",
6773 def DeclareLocks(self, lu, level):
6776 def _GetQueryData(self, lu):
6777 """Computes the list of nodes and their attributes.
6780 # Locking is not used
6781 assert not (compat.any(lu.glm.is_owned(level)
6782 for level in locking.LEVELS
6783 if level != locking.LEVEL_CLUSTER) or
6784 self.do_locking or self.use_locking)
6786 if query.CQ_CONFIG in self.requested_data:
6787 cluster = lu.cfg.GetClusterInfo()
6789 cluster = NotImplemented
6791 if query.CQ_QUEUE_DRAINED in self.requested_data:
6792 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6794 drain_flag = NotImplemented
6796 if query.CQ_WATCHER_PAUSE in self.requested_data:
6797 master_name = lu.cfg.GetMasterNode()
6799 result = lu.rpc.call_get_watcher_pause(master_name)
6800 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6803 watcher_pause = result.payload
6805 watcher_pause = NotImplemented
6807 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6810 class LUInstanceActivateDisks(NoHooksLU):
6811 """Bring up an instance's disks.
6816 def ExpandNames(self):
6817 self._ExpandAndLockInstance()
6818 self.needed_locks[locking.LEVEL_NODE] = []
6819 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6821 def DeclareLocks(self, level):
6822 if level == locking.LEVEL_NODE:
6823 self._LockInstancesNodes()
6825 def CheckPrereq(self):
6826 """Check prerequisites.
6828 This checks that the instance is in the cluster.
6831 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832 assert self.instance is not None, \
6833 "Cannot retrieve locked instance %s" % self.op.instance_name
6834 _CheckNodeOnline(self, self.instance.primary_node)
6836 def Exec(self, feedback_fn):
6837 """Activate the disks.
6840 disks_ok, disks_info = \
6841 _AssembleInstanceDisks(self, self.instance,
6842 ignore_size=self.op.ignore_size)
6844 raise errors.OpExecError("Cannot activate block devices")
6846 if self.op.wait_for_sync:
6847 if not _WaitForSync(self, self.instance):
6848 raise errors.OpExecError("Some disks of the instance are degraded!")
6853 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6855 """Prepare the block devices for an instance.
6857 This sets up the block devices on all nodes.
6859 @type lu: L{LogicalUnit}
6860 @param lu: the logical unit on whose behalf we execute
6861 @type instance: L{objects.Instance}
6862 @param instance: the instance for whose disks we assemble
6863 @type disks: list of L{objects.Disk} or None
6864 @param disks: which disks to assemble (or all, if None)
6865 @type ignore_secondaries: boolean
6866 @param ignore_secondaries: if true, errors on secondary nodes
6867 won't result in an error return from the function
6868 @type ignore_size: boolean
6869 @param ignore_size: if true, the current known size of the disk
6870 will not be used during the disk activation, useful for cases
6871 when the size is wrong
6872 @return: False if the operation failed, otherwise a list of
6873 (host, instance_visible_name, node_visible_name)
6874 with the mapping from node devices to instance devices
6879 iname = instance.name
6880 disks = _ExpandCheckDisks(instance, disks)
6882 # With the two passes mechanism we try to reduce the window of
6883 # opportunity for the race condition of switching DRBD to primary
6884 # before handshaking occured, but we do not eliminate it
6886 # The proper fix would be to wait (with some limits) until the
6887 # connection has been made and drbd transitions from WFConnection
6888 # into any other network-connected state (Connected, SyncTarget,
6891 # 1st pass, assemble on all nodes in secondary mode
6892 for idx, inst_disk in enumerate(disks):
6893 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6895 node_disk = node_disk.Copy()
6896 node_disk.UnsetSize()
6897 lu.cfg.SetDiskID(node_disk, node)
6898 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6900 msg = result.fail_msg
6902 is_offline_secondary = (node in instance.secondary_nodes and
6904 lu.LogWarning("Could not prepare block device %s on node %s"
6905 " (is_primary=False, pass=1): %s",
6906 inst_disk.iv_name, node, msg)
6907 if not (ignore_secondaries or is_offline_secondary):
6910 # FIXME: race condition on drbd migration to primary
6912 # 2nd pass, do only the primary node
6913 for idx, inst_disk in enumerate(disks):
6916 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6917 if node != instance.primary_node:
6920 node_disk = node_disk.Copy()
6921 node_disk.UnsetSize()
6922 lu.cfg.SetDiskID(node_disk, node)
6923 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6925 msg = result.fail_msg
6927 lu.LogWarning("Could not prepare block device %s on node %s"
6928 " (is_primary=True, pass=2): %s",
6929 inst_disk.iv_name, node, msg)
6932 dev_path = result.payload
6934 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6936 # leave the disks configured for the primary node
6937 # this is a workaround that would be fixed better by
6938 # improving the logical/physical id handling
6940 lu.cfg.SetDiskID(disk, instance.primary_node)
6942 return disks_ok, device_info
6945 def _StartInstanceDisks(lu, instance, force):
6946 """Start the disks of an instance.
6949 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6950 ignore_secondaries=force)
6952 _ShutdownInstanceDisks(lu, instance)
6953 if force is not None and not force:
6955 hint=("If the message above refers to a secondary node,"
6956 " you can retry the operation using '--force'"))
6957 raise errors.OpExecError("Disk consistency error")
6960 class LUInstanceDeactivateDisks(NoHooksLU):
6961 """Shutdown an instance's disks.
6966 def ExpandNames(self):
6967 self._ExpandAndLockInstance()
6968 self.needed_locks[locking.LEVEL_NODE] = []
6969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6971 def DeclareLocks(self, level):
6972 if level == locking.LEVEL_NODE:
6973 self._LockInstancesNodes()
6975 def CheckPrereq(self):
6976 """Check prerequisites.
6978 This checks that the instance is in the cluster.
6981 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6982 assert self.instance is not None, \
6983 "Cannot retrieve locked instance %s" % self.op.instance_name
6985 def Exec(self, feedback_fn):
6986 """Deactivate the disks
6989 instance = self.instance
6991 _ShutdownInstanceDisks(self, instance)
6993 _SafeShutdownInstanceDisks(self, instance)
6996 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6997 """Shutdown block devices of an instance.
6999 This function checks if an instance is running, before calling
7000 _ShutdownInstanceDisks.
7003 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7004 _ShutdownInstanceDisks(lu, instance, disks=disks)
7007 def _ExpandCheckDisks(instance, disks):
7008 """Return the instance disks selected by the disks list
7010 @type disks: list of L{objects.Disk} or None
7011 @param disks: selected disks
7012 @rtype: list of L{objects.Disk}
7013 @return: selected instance disks to act on
7017 return instance.disks
7019 if not set(disks).issubset(instance.disks):
7020 raise errors.ProgrammerError("Can only act on disks belonging to the"
7025 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7026 """Shutdown block devices of an instance.
7028 This does the shutdown on all nodes of the instance.
7030 If the ignore_primary is false, errors on the primary node are
7035 disks = _ExpandCheckDisks(instance, disks)
7038 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7039 lu.cfg.SetDiskID(top_disk, node)
7040 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7041 msg = result.fail_msg
7043 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7044 disk.iv_name, node, msg)
7045 if ((node == instance.primary_node and not ignore_primary) or
7046 (node != instance.primary_node and not result.offline)):
7051 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7052 """Checks if a node has enough free memory.
7054 This function checks if a given node has the needed amount of free
7055 memory. In case the node has less memory or we cannot get the
7056 information from the node, this function raises an OpPrereqError
7059 @type lu: C{LogicalUnit}
7060 @param lu: a logical unit from which we get configuration data
7062 @param node: the node to check
7063 @type reason: C{str}
7064 @param reason: string to use in the error message
7065 @type requested: C{int}
7066 @param requested: the amount of memory in MiB to check for
7067 @type hypervisor_name: C{str}
7068 @param hypervisor_name: the hypervisor to ask for memory stats
7070 @return: node current free memory
7071 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7072 we cannot check the node
7075 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7076 nodeinfo[node].Raise("Can't get data from node %s" % node,
7077 prereq=True, ecode=errors.ECODE_ENVIRON)
7078 (_, _, (hv_info, )) = nodeinfo[node].payload
7080 free_mem = hv_info.get("memory_free", None)
7081 if not isinstance(free_mem, int):
7082 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7083 " was '%s'" % (node, free_mem),
7084 errors.ECODE_ENVIRON)
7085 if requested > free_mem:
7086 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7087 " needed %s MiB, available %s MiB" %
7088 (node, reason, requested, free_mem),
7093 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7094 """Checks if nodes have enough free disk space in all the VGs.
7096 This function checks if all given nodes have the needed amount of
7097 free disk. In case any node has less disk or we cannot get the
7098 information from the node, this function raises an OpPrereqError
7101 @type lu: C{LogicalUnit}
7102 @param lu: a logical unit from which we get configuration data
7103 @type nodenames: C{list}
7104 @param nodenames: the list of node names to check
7105 @type req_sizes: C{dict}
7106 @param req_sizes: the hash of vg and corresponding amount of disk in
7108 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7109 or we cannot check the node
7112 for vg, req_size in req_sizes.items():
7113 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7116 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7117 """Checks if nodes have enough free disk space in the specified VG.
7119 This function checks if all given nodes have the needed amount of
7120 free disk. In case any node has less disk or we cannot get the
7121 information from the node, this function raises an OpPrereqError
7124 @type lu: C{LogicalUnit}
7125 @param lu: a logical unit from which we get configuration data
7126 @type nodenames: C{list}
7127 @param nodenames: the list of node names to check
7129 @param vg: the volume group to check
7130 @type requested: C{int}
7131 @param requested: the amount of disk in MiB to check for
7132 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7133 or we cannot check the node
7136 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7137 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7138 for node in nodenames:
7139 info = nodeinfo[node]
7140 info.Raise("Cannot get current information from node %s" % node,
7141 prereq=True, ecode=errors.ECODE_ENVIRON)
7142 (_, (vg_info, ), _) = info.payload
7143 vg_free = vg_info.get("vg_free", None)
7144 if not isinstance(vg_free, int):
7145 raise errors.OpPrereqError("Can't compute free disk space on node"
7146 " %s for vg %s, result was '%s'" %
7147 (node, vg, vg_free), errors.ECODE_ENVIRON)
7148 if requested > vg_free:
7149 raise errors.OpPrereqError("Not enough disk space on target node %s"
7150 " vg %s: required %d MiB, available %d MiB" %
7151 (node, vg, requested, vg_free),
7155 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7156 """Checks if nodes have enough physical CPUs
7158 This function checks if all given nodes have the needed number of
7159 physical CPUs. In case any node has less CPUs or we cannot get the
7160 information from the node, this function raises an OpPrereqError
7163 @type lu: C{LogicalUnit}
7164 @param lu: a logical unit from which we get configuration data
7165 @type nodenames: C{list}
7166 @param nodenames: the list of node names to check
7167 @type requested: C{int}
7168 @param requested: the minimum acceptable number of physical CPUs
7169 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7170 or we cannot check the node
7173 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7174 for node in nodenames:
7175 info = nodeinfo[node]
7176 info.Raise("Cannot get current information from node %s" % node,
7177 prereq=True, ecode=errors.ECODE_ENVIRON)
7178 (_, _, (hv_info, )) = info.payload
7179 num_cpus = hv_info.get("cpu_total", None)
7180 if not isinstance(num_cpus, int):
7181 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7182 " on node %s, result was '%s'" %
7183 (node, num_cpus), errors.ECODE_ENVIRON)
7184 if requested > num_cpus:
7185 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7186 "required" % (node, num_cpus, requested),
7190 class LUInstanceStartup(LogicalUnit):
7191 """Starts an instance.
7194 HPATH = "instance-start"
7195 HTYPE = constants.HTYPE_INSTANCE
7198 def CheckArguments(self):
7200 if self.op.beparams:
7201 # fill the beparams dict
7202 objects.UpgradeBeParams(self.op.beparams)
7203 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7205 def ExpandNames(self):
7206 self._ExpandAndLockInstance()
7207 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7209 def DeclareLocks(self, level):
7210 if level == locking.LEVEL_NODE_RES:
7211 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7213 def BuildHooksEnv(self):
7216 This runs on master, primary and secondary nodes of the instance.
7220 "FORCE": self.op.force,
7223 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7227 def BuildHooksNodes(self):
7228 """Build hooks nodes.
7231 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7234 def CheckPrereq(self):
7235 """Check prerequisites.
7237 This checks that the instance is in the cluster.
7240 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7241 assert self.instance is not None, \
7242 "Cannot retrieve locked instance %s" % self.op.instance_name
7245 if self.op.hvparams:
7246 # check hypervisor parameter syntax (locally)
7247 cluster = self.cfg.GetClusterInfo()
7248 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7249 filled_hvp = cluster.FillHV(instance)
7250 filled_hvp.update(self.op.hvparams)
7251 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7252 hv_type.CheckParameterSyntax(filled_hvp)
7253 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7255 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7257 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7259 if self.primary_offline and self.op.ignore_offline_nodes:
7260 self.LogWarning("Ignoring offline primary node")
7262 if self.op.hvparams or self.op.beparams:
7263 self.LogWarning("Overridden parameters are ignored")
7265 _CheckNodeOnline(self, instance.primary_node)
7267 bep = self.cfg.GetClusterInfo().FillBE(instance)
7268 bep.update(self.op.beparams)
7270 # check bridges existence
7271 _CheckInstanceBridgesExist(self, instance)
7273 remote_info = self.rpc.call_instance_info(instance.primary_node,
7275 instance.hypervisor)
7276 remote_info.Raise("Error checking node %s" % instance.primary_node,
7277 prereq=True, ecode=errors.ECODE_ENVIRON)
7278 if not remote_info.payload: # not running already
7279 _CheckNodeFreeMemory(self, instance.primary_node,
7280 "starting instance %s" % instance.name,
7281 bep[constants.BE_MINMEM], instance.hypervisor)
7283 def Exec(self, feedback_fn):
7284 """Start the instance.
7287 instance = self.instance
7288 force = self.op.force
7290 if not self.op.no_remember:
7291 self.cfg.MarkInstanceUp(instance.name)
7293 if self.primary_offline:
7294 assert self.op.ignore_offline_nodes
7295 self.LogInfo("Primary node offline, marked instance as started")
7297 node_current = instance.primary_node
7299 _StartInstanceDisks(self, instance, force)
7302 self.rpc.call_instance_start(node_current,
7303 (instance, self.op.hvparams,
7305 self.op.startup_paused)
7306 msg = result.fail_msg
7308 _ShutdownInstanceDisks(self, instance)
7309 raise errors.OpExecError("Could not start instance: %s" % msg)
7312 class LUInstanceReboot(LogicalUnit):
7313 """Reboot an instance.
7316 HPATH = "instance-reboot"
7317 HTYPE = constants.HTYPE_INSTANCE
7320 def ExpandNames(self):
7321 self._ExpandAndLockInstance()
7323 def BuildHooksEnv(self):
7326 This runs on master, primary and secondary nodes of the instance.
7330 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7331 "REBOOT_TYPE": self.op.reboot_type,
7332 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7335 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7339 def BuildHooksNodes(self):
7340 """Build hooks nodes.
7343 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7346 def CheckPrereq(self):
7347 """Check prerequisites.
7349 This checks that the instance is in the cluster.
7352 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7353 assert self.instance is not None, \
7354 "Cannot retrieve locked instance %s" % self.op.instance_name
7355 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7356 _CheckNodeOnline(self, instance.primary_node)
7358 # check bridges existence
7359 _CheckInstanceBridgesExist(self, instance)
7361 def Exec(self, feedback_fn):
7362 """Reboot the instance.
7365 instance = self.instance
7366 ignore_secondaries = self.op.ignore_secondaries
7367 reboot_type = self.op.reboot_type
7369 remote_info = self.rpc.call_instance_info(instance.primary_node,
7371 instance.hypervisor)
7372 remote_info.Raise("Error checking node %s" % instance.primary_node)
7373 instance_running = bool(remote_info.payload)
7375 node_current = instance.primary_node
7377 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7378 constants.INSTANCE_REBOOT_HARD]:
7379 for disk in instance.disks:
7380 self.cfg.SetDiskID(disk, node_current)
7381 result = self.rpc.call_instance_reboot(node_current, instance,
7383 self.op.shutdown_timeout)
7384 result.Raise("Could not reboot instance")
7386 if instance_running:
7387 result = self.rpc.call_instance_shutdown(node_current, instance,
7388 self.op.shutdown_timeout)
7389 result.Raise("Could not shutdown instance for full reboot")
7390 _ShutdownInstanceDisks(self, instance)
7392 self.LogInfo("Instance %s was already stopped, starting now",
7394 _StartInstanceDisks(self, instance, ignore_secondaries)
7395 result = self.rpc.call_instance_start(node_current,
7396 (instance, None, None), False)
7397 msg = result.fail_msg
7399 _ShutdownInstanceDisks(self, instance)
7400 raise errors.OpExecError("Could not start instance for"
7401 " full reboot: %s" % msg)
7403 self.cfg.MarkInstanceUp(instance.name)
7406 class LUInstanceShutdown(LogicalUnit):
7407 """Shutdown an instance.
7410 HPATH = "instance-stop"
7411 HTYPE = constants.HTYPE_INSTANCE
7414 def ExpandNames(self):
7415 self._ExpandAndLockInstance()
7417 def BuildHooksEnv(self):
7420 This runs on master, primary and secondary nodes of the instance.
7423 env = _BuildInstanceHookEnvByObject(self, self.instance)
7424 env["TIMEOUT"] = self.op.timeout
7427 def BuildHooksNodes(self):
7428 """Build hooks nodes.
7431 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7434 def CheckPrereq(self):
7435 """Check prerequisites.
7437 This checks that the instance is in the cluster.
7440 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7441 assert self.instance is not None, \
7442 "Cannot retrieve locked instance %s" % self.op.instance_name
7444 if not self.op.force:
7445 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7447 self.LogWarning("Ignoring offline instance check")
7449 self.primary_offline = \
7450 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7452 if self.primary_offline and self.op.ignore_offline_nodes:
7453 self.LogWarning("Ignoring offline primary node")
7455 _CheckNodeOnline(self, self.instance.primary_node)
7457 def Exec(self, feedback_fn):
7458 """Shutdown the instance.
7461 instance = self.instance
7462 node_current = instance.primary_node
7463 timeout = self.op.timeout
7465 # If the instance is offline we shouldn't mark it as down, as that
7466 # resets the offline flag.
7467 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7468 self.cfg.MarkInstanceDown(instance.name)
7470 if self.primary_offline:
7471 assert self.op.ignore_offline_nodes
7472 self.LogInfo("Primary node offline, marked instance as stopped")
7474 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7475 msg = result.fail_msg
7477 self.LogWarning("Could not shutdown instance: %s", msg)
7479 _ShutdownInstanceDisks(self, instance)
7482 class LUInstanceReinstall(LogicalUnit):
7483 """Reinstall an instance.
7486 HPATH = "instance-reinstall"
7487 HTYPE = constants.HTYPE_INSTANCE
7490 def ExpandNames(self):
7491 self._ExpandAndLockInstance()
7493 def BuildHooksEnv(self):
7496 This runs on master, primary and secondary nodes of the instance.
7499 return _BuildInstanceHookEnvByObject(self, self.instance)
7501 def BuildHooksNodes(self):
7502 """Build hooks nodes.
7505 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7508 def CheckPrereq(self):
7509 """Check prerequisites.
7511 This checks that the instance is in the cluster and is not running.
7514 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7515 assert instance is not None, \
7516 "Cannot retrieve locked instance %s" % self.op.instance_name
7517 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7518 " offline, cannot reinstall")
7520 if instance.disk_template == constants.DT_DISKLESS:
7521 raise errors.OpPrereqError("Instance '%s' has no disks" %
7522 self.op.instance_name,
7524 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7526 if self.op.os_type is not None:
7528 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7529 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7530 instance_os = self.op.os_type
7532 instance_os = instance.os
7534 nodelist = list(instance.all_nodes)
7536 if self.op.osparams:
7537 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7538 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7539 self.os_inst = i_osdict # the new dict (without defaults)
7543 self.instance = instance
7545 def Exec(self, feedback_fn):
7546 """Reinstall the instance.
7549 inst = self.instance
7551 if self.op.os_type is not None:
7552 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7553 inst.os = self.op.os_type
7554 # Write to configuration
7555 self.cfg.Update(inst, feedback_fn)
7557 _StartInstanceDisks(self, inst, None)
7559 feedback_fn("Running the instance OS create scripts...")
7560 # FIXME: pass debug option from opcode to backend
7561 result = self.rpc.call_instance_os_add(inst.primary_node,
7562 (inst, self.os_inst), True,
7563 self.op.debug_level)
7564 result.Raise("Could not install OS for instance %s on node %s" %
7565 (inst.name, inst.primary_node))
7567 _ShutdownInstanceDisks(self, inst)
7570 class LUInstanceRecreateDisks(LogicalUnit):
7571 """Recreate an instance's missing disks.
7574 HPATH = "instance-recreate-disks"
7575 HTYPE = constants.HTYPE_INSTANCE
7578 _MODIFYABLE = compat.UniqueFrozenset([
7579 constants.IDISK_SIZE,
7580 constants.IDISK_MODE,
7583 # New or changed disk parameters may have different semantics
7584 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7585 constants.IDISK_ADOPT,
7587 # TODO: Implement support changing VG while recreating
7589 constants.IDISK_METAVG,
7590 constants.IDISK_PROVIDER,
7593 def _RunAllocator(self):
7594 """Run the allocator based on input opcode.
7597 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7600 # The allocator should actually run in "relocate" mode, but current
7601 # allocators don't support relocating all the nodes of an instance at
7602 # the same time. As a workaround we use "allocate" mode, but this is
7603 # suboptimal for two reasons:
7604 # - The instance name passed to the allocator is present in the list of
7605 # existing instances, so there could be a conflict within the
7606 # internal structures of the allocator. This doesn't happen with the
7607 # current allocators, but it's a liability.
7608 # - The allocator counts the resources used by the instance twice: once
7609 # because the instance exists already, and once because it tries to
7610 # allocate a new instance.
7611 # The allocator could choose some of the nodes on which the instance is
7612 # running, but that's not a problem. If the instance nodes are broken,
7613 # they should be already be marked as drained or offline, and hence
7614 # skipped by the allocator. If instance disks have been lost for other
7615 # reasons, then recreating the disks on the same nodes should be fine.
7616 disk_template = self.instance.disk_template
7617 spindle_use = be_full[constants.BE_SPINDLE_USE]
7618 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7619 disk_template=disk_template,
7620 tags=list(self.instance.GetTags()),
7621 os=self.instance.os,
7623 vcpus=be_full[constants.BE_VCPUS],
7624 memory=be_full[constants.BE_MAXMEM],
7625 spindle_use=spindle_use,
7626 disks=[{constants.IDISK_SIZE: d.size,
7627 constants.IDISK_MODE: d.mode}
7628 for d in self.instance.disks],
7629 hypervisor=self.instance.hypervisor,
7630 node_whitelist=None)
7631 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7633 ial.Run(self.op.iallocator)
7635 assert req.RequiredNodes() == len(self.instance.all_nodes)
7638 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7639 " %s" % (self.op.iallocator, ial.info),
7642 self.op.nodes = ial.result
7643 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7644 self.op.instance_name, self.op.iallocator,
7645 utils.CommaJoin(ial.result))
7647 def CheckArguments(self):
7648 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7649 # Normalize and convert deprecated list of disk indices
7650 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7652 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7654 raise errors.OpPrereqError("Some disks have been specified more than"
7655 " once: %s" % utils.CommaJoin(duplicates),
7658 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7659 # when neither iallocator nor nodes are specified
7660 if self.op.iallocator or self.op.nodes:
7661 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7663 for (idx, params) in self.op.disks:
7664 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7665 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7667 raise errors.OpPrereqError("Parameters for disk %s try to change"
7668 " unmodifyable parameter(s): %s" %
7669 (idx, utils.CommaJoin(unsupported)),
7672 def ExpandNames(self):
7673 self._ExpandAndLockInstance()
7674 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7677 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7678 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7680 self.needed_locks[locking.LEVEL_NODE] = []
7681 if self.op.iallocator:
7682 # iallocator will select a new node in the same group
7683 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7684 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7686 self.needed_locks[locking.LEVEL_NODE_RES] = []
7688 def DeclareLocks(self, level):
7689 if level == locking.LEVEL_NODEGROUP:
7690 assert self.op.iallocator is not None
7691 assert not self.op.nodes
7692 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7693 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7694 # Lock the primary group used by the instance optimistically; this
7695 # requires going via the node before it's locked, requiring
7696 # verification later on
7697 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7698 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7700 elif level == locking.LEVEL_NODE:
7701 # If an allocator is used, then we lock all the nodes in the current
7702 # instance group, as we don't know yet which ones will be selected;
7703 # if we replace the nodes without using an allocator, locks are
7704 # already declared in ExpandNames; otherwise, we need to lock all the
7705 # instance nodes for disk re-creation
7706 if self.op.iallocator:
7707 assert not self.op.nodes
7708 assert not self.needed_locks[locking.LEVEL_NODE]
7709 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7711 # Lock member nodes of the group of the primary node
7712 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7713 self.needed_locks[locking.LEVEL_NODE].extend(
7714 self.cfg.GetNodeGroup(group_uuid).members)
7716 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7717 elif not self.op.nodes:
7718 self._LockInstancesNodes(primary_only=False)
7719 elif level == locking.LEVEL_NODE_RES:
7721 self.needed_locks[locking.LEVEL_NODE_RES] = \
7722 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7724 def BuildHooksEnv(self):
7727 This runs on master, primary and secondary nodes of the instance.
7730 return _BuildInstanceHookEnvByObject(self, self.instance)
7732 def BuildHooksNodes(self):
7733 """Build hooks nodes.
7736 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7739 def CheckPrereq(self):
7740 """Check prerequisites.
7742 This checks that the instance is in the cluster and is not running.
7745 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7746 assert instance is not None, \
7747 "Cannot retrieve locked instance %s" % self.op.instance_name
7749 if len(self.op.nodes) != len(instance.all_nodes):
7750 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7751 " %d replacement nodes were specified" %
7752 (instance.name, len(instance.all_nodes),
7753 len(self.op.nodes)),
7755 assert instance.disk_template != constants.DT_DRBD8 or \
7756 len(self.op.nodes) == 2
7757 assert instance.disk_template != constants.DT_PLAIN or \
7758 len(self.op.nodes) == 1
7759 primary_node = self.op.nodes[0]
7761 primary_node = instance.primary_node
7762 if not self.op.iallocator:
7763 _CheckNodeOnline(self, primary_node)
7765 if instance.disk_template == constants.DT_DISKLESS:
7766 raise errors.OpPrereqError("Instance '%s' has no disks" %
7767 self.op.instance_name, errors.ECODE_INVAL)
7769 # Verify if node group locks are still correct
7770 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7772 # Node group locks are acquired only for the primary node (and only
7773 # when the allocator is used)
7774 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7777 # if we replace nodes *and* the old primary is offline, we don't
7778 # check the instance state
7779 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7780 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7781 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7782 msg="cannot recreate disks")
7785 self.disks = dict(self.op.disks)
7787 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7789 maxidx = max(self.disks.keys())
7790 if maxidx >= len(instance.disks):
7791 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7794 if ((self.op.nodes or self.op.iallocator) and
7795 sorted(self.disks.keys()) != range(len(instance.disks))):
7796 raise errors.OpPrereqError("Can't recreate disks partially and"
7797 " change the nodes at the same time",
7800 self.instance = instance
7802 if self.op.iallocator:
7803 self._RunAllocator()
7804 # Release unneeded node and node resource locks
7805 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7806 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7807 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7809 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7811 def Exec(self, feedback_fn):
7812 """Recreate the disks.
7815 instance = self.instance
7817 assert (self.owned_locks(locking.LEVEL_NODE) ==
7818 self.owned_locks(locking.LEVEL_NODE_RES))
7821 mods = [] # keeps track of needed changes
7823 for idx, disk in enumerate(instance.disks):
7825 changes = self.disks[idx]
7827 # Disk should not be recreated
7831 # update secondaries for disks, if needed
7832 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7833 # need to update the nodes and minors
7834 assert len(self.op.nodes) == 2
7835 assert len(disk.logical_id) == 6 # otherwise disk internals
7837 (_, _, old_port, _, _, old_secret) = disk.logical_id
7838 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7839 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7840 new_minors[0], new_minors[1], old_secret)
7841 assert len(disk.logical_id) == len(new_id)
7845 mods.append((idx, new_id, changes))
7847 # now that we have passed all asserts above, we can apply the mods
7848 # in a single run (to avoid partial changes)
7849 for idx, new_id, changes in mods:
7850 disk = instance.disks[idx]
7851 if new_id is not None:
7852 assert disk.dev_type == constants.LD_DRBD8
7853 disk.logical_id = new_id
7855 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7856 mode=changes.get(constants.IDISK_MODE, None))
7858 # change primary node, if needed
7860 instance.primary_node = self.op.nodes[0]
7861 self.LogWarning("Changing the instance's nodes, you will have to"
7862 " remove any disks left on the older nodes manually")
7865 self.cfg.Update(instance, feedback_fn)
7867 # All touched nodes must be locked
7868 mylocks = self.owned_locks(locking.LEVEL_NODE)
7869 assert mylocks.issuperset(frozenset(instance.all_nodes))
7870 _CreateDisks(self, instance, to_skip=to_skip)
7873 class LUInstanceRename(LogicalUnit):
7874 """Rename an instance.
7877 HPATH = "instance-rename"
7878 HTYPE = constants.HTYPE_INSTANCE
7880 def CheckArguments(self):
7884 if self.op.ip_check and not self.op.name_check:
7885 # TODO: make the ip check more flexible and not depend on the name check
7886 raise errors.OpPrereqError("IP address check requires a name check",
7889 def BuildHooksEnv(self):
7892 This runs on master, primary and secondary nodes of the instance.
7895 env = _BuildInstanceHookEnvByObject(self, self.instance)
7896 env["INSTANCE_NEW_NAME"] = self.op.new_name
7899 def BuildHooksNodes(self):
7900 """Build hooks nodes.
7903 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7906 def CheckPrereq(self):
7907 """Check prerequisites.
7909 This checks that the instance is in the cluster and is not running.
7912 self.op.instance_name = _ExpandInstanceName(self.cfg,
7913 self.op.instance_name)
7914 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7915 assert instance is not None
7916 _CheckNodeOnline(self, instance.primary_node)
7917 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7918 msg="cannot rename")
7919 self.instance = instance
7921 new_name = self.op.new_name
7922 if self.op.name_check:
7923 hostname = _CheckHostnameSane(self, new_name)
7924 new_name = self.op.new_name = hostname.name
7925 if (self.op.ip_check and
7926 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7927 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7928 (hostname.ip, new_name),
7929 errors.ECODE_NOTUNIQUE)
7931 instance_list = self.cfg.GetInstanceList()
7932 if new_name in instance_list and new_name != instance.name:
7933 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7934 new_name, errors.ECODE_EXISTS)
7936 def Exec(self, feedback_fn):
7937 """Rename the instance.
7940 inst = self.instance
7941 old_name = inst.name
7943 rename_file_storage = False
7944 if (inst.disk_template in constants.DTS_FILEBASED and
7945 self.op.new_name != inst.name):
7946 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7947 rename_file_storage = True
7949 self.cfg.RenameInstance(inst.name, self.op.new_name)
7950 # Change the instance lock. This is definitely safe while we hold the BGL.
7951 # Otherwise the new lock would have to be added in acquired mode.
7953 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7954 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7955 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7957 # re-read the instance from the configuration after rename
7958 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7960 if rename_file_storage:
7961 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7963 old_file_storage_dir,
7964 new_file_storage_dir)
7965 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7966 " (but the instance has been renamed in Ganeti)" %
7967 (inst.primary_node, old_file_storage_dir,
7968 new_file_storage_dir))
7970 _StartInstanceDisks(self, inst, None)
7971 # update info on disks
7972 info = _GetInstanceInfoText(inst)
7973 for (idx, disk) in enumerate(inst.disks):
7974 for node in inst.all_nodes:
7975 self.cfg.SetDiskID(disk, node)
7976 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7978 self.LogWarning("Error setting info on node %s for disk %s: %s",
7979 node, idx, result.fail_msg)
7981 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7982 old_name, self.op.debug_level)
7983 msg = result.fail_msg
7985 msg = ("Could not run OS rename script for instance %s on node %s"
7986 " (but the instance has been renamed in Ganeti): %s" %
7987 (inst.name, inst.primary_node, msg))
7988 self.LogWarning(msg)
7990 _ShutdownInstanceDisks(self, inst)
7995 class LUInstanceRemove(LogicalUnit):
7996 """Remove an instance.
7999 HPATH = "instance-remove"
8000 HTYPE = constants.HTYPE_INSTANCE
8003 def ExpandNames(self):
8004 self._ExpandAndLockInstance()
8005 self.needed_locks[locking.LEVEL_NODE] = []
8006 self.needed_locks[locking.LEVEL_NODE_RES] = []
8007 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8009 def DeclareLocks(self, level):
8010 if level == locking.LEVEL_NODE:
8011 self._LockInstancesNodes()
8012 elif level == locking.LEVEL_NODE_RES:
8014 self.needed_locks[locking.LEVEL_NODE_RES] = \
8015 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8017 def BuildHooksEnv(self):
8020 This runs on master, primary and secondary nodes of the instance.
8023 env = _BuildInstanceHookEnvByObject(self, self.instance)
8024 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8027 def BuildHooksNodes(self):
8028 """Build hooks nodes.
8031 nl = [self.cfg.GetMasterNode()]
8032 nl_post = list(self.instance.all_nodes) + nl
8033 return (nl, nl_post)
8035 def CheckPrereq(self):
8036 """Check prerequisites.
8038 This checks that the instance is in the cluster.
8041 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8042 assert self.instance is not None, \
8043 "Cannot retrieve locked instance %s" % self.op.instance_name
8045 def Exec(self, feedback_fn):
8046 """Remove the instance.
8049 instance = self.instance
8050 logging.info("Shutting down instance %s on node %s",
8051 instance.name, instance.primary_node)
8053 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8054 self.op.shutdown_timeout)
8055 msg = result.fail_msg
8057 if self.op.ignore_failures:
8058 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8060 raise errors.OpExecError("Could not shutdown instance %s on"
8062 (instance.name, instance.primary_node, msg))
8064 assert (self.owned_locks(locking.LEVEL_NODE) ==
8065 self.owned_locks(locking.LEVEL_NODE_RES))
8066 assert not (set(instance.all_nodes) -
8067 self.owned_locks(locking.LEVEL_NODE)), \
8068 "Not owning correct locks"
8070 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8073 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8074 """Utility function to remove an instance.
8077 logging.info("Removing block devices for instance %s", instance.name)
8079 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8080 if not ignore_failures:
8081 raise errors.OpExecError("Can't remove instance's disks")
8082 feedback_fn("Warning: can't remove instance's disks")
8084 logging.info("Removing instance %s out of cluster config", instance.name)
8086 lu.cfg.RemoveInstance(instance.name)
8088 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8089 "Instance lock removal conflict"
8091 # Remove lock for the instance
8092 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8095 class LUInstanceQuery(NoHooksLU):
8096 """Logical unit for querying instances.
8099 # pylint: disable=W0142
8102 def CheckArguments(self):
8103 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8104 self.op.output_fields, self.op.use_locking)
8106 def ExpandNames(self):
8107 self.iq.ExpandNames(self)
8109 def DeclareLocks(self, level):
8110 self.iq.DeclareLocks(self, level)
8112 def Exec(self, feedback_fn):
8113 return self.iq.OldStyleQuery(self)
8116 def _ExpandNamesForMigration(lu):
8117 """Expands names for use with L{TLMigrateInstance}.
8119 @type lu: L{LogicalUnit}
8122 if lu.op.target_node is not None:
8123 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8125 lu.needed_locks[locking.LEVEL_NODE] = []
8126 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8128 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8129 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8131 # The node allocation lock is actually only needed for replicated instances
8132 # (e.g. DRBD8) and if an iallocator is used.
8133 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8136 def _DeclareLocksForMigration(lu, level):
8137 """Declares locks for L{TLMigrateInstance}.
8139 @type lu: L{LogicalUnit}
8140 @param level: Lock level
8143 if level == locking.LEVEL_NODE_ALLOC:
8144 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8146 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8148 # Node locks are already declared here rather than at LEVEL_NODE as we need
8149 # the instance object anyway to declare the node allocation lock.
8150 if instance.disk_template in constants.DTS_EXT_MIRROR:
8151 if lu.op.target_node is None:
8152 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8153 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8155 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8157 del lu.recalculate_locks[locking.LEVEL_NODE]
8159 lu._LockInstancesNodes() # pylint: disable=W0212
8161 elif level == locking.LEVEL_NODE:
8162 # Node locks are declared together with the node allocation lock
8163 assert (lu.needed_locks[locking.LEVEL_NODE] or
8164 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8166 elif level == locking.LEVEL_NODE_RES:
8168 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8169 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8172 class LUInstanceFailover(LogicalUnit):
8173 """Failover an instance.
8176 HPATH = "instance-failover"
8177 HTYPE = constants.HTYPE_INSTANCE
8180 def CheckArguments(self):
8181 """Check the arguments.
8184 self.iallocator = getattr(self.op, "iallocator", None)
8185 self.target_node = getattr(self.op, "target_node", None)
8187 def ExpandNames(self):
8188 self._ExpandAndLockInstance()
8189 _ExpandNamesForMigration(self)
8192 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8193 self.op.ignore_consistency, True,
8194 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8196 self.tasklets = [self._migrater]
8198 def DeclareLocks(self, level):
8199 _DeclareLocksForMigration(self, level)
8201 def BuildHooksEnv(self):
8204 This runs on master, primary and secondary nodes of the instance.
8207 instance = self._migrater.instance
8208 source_node = instance.primary_node
8209 target_node = self.op.target_node
8211 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8212 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8213 "OLD_PRIMARY": source_node,
8214 "NEW_PRIMARY": target_node,
8217 if instance.disk_template in constants.DTS_INT_MIRROR:
8218 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8219 env["NEW_SECONDARY"] = source_node
8221 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8223 env.update(_BuildInstanceHookEnvByObject(self, instance))
8227 def BuildHooksNodes(self):
8228 """Build hooks nodes.
8231 instance = self._migrater.instance
8232 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8233 return (nl, nl + [instance.primary_node])
8236 class LUInstanceMigrate(LogicalUnit):
8237 """Migrate an instance.
8239 This is migration without shutting down, compared to the failover,
8240 which is done with shutdown.
8243 HPATH = "instance-migrate"
8244 HTYPE = constants.HTYPE_INSTANCE
8247 def ExpandNames(self):
8248 self._ExpandAndLockInstance()
8249 _ExpandNamesForMigration(self)
8252 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8253 False, self.op.allow_failover, False,
8254 self.op.allow_runtime_changes,
8255 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8256 self.op.ignore_ipolicy)
8258 self.tasklets = [self._migrater]
8260 def DeclareLocks(self, level):
8261 _DeclareLocksForMigration(self, level)
8263 def BuildHooksEnv(self):
8266 This runs on master, primary and secondary nodes of the instance.
8269 instance = self._migrater.instance
8270 source_node = instance.primary_node
8271 target_node = self.op.target_node
8272 env = _BuildInstanceHookEnvByObject(self, instance)
8274 "MIGRATE_LIVE": self._migrater.live,
8275 "MIGRATE_CLEANUP": self.op.cleanup,
8276 "OLD_PRIMARY": source_node,
8277 "NEW_PRIMARY": target_node,
8278 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8281 if instance.disk_template in constants.DTS_INT_MIRROR:
8282 env["OLD_SECONDARY"] = target_node
8283 env["NEW_SECONDARY"] = source_node
8285 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8289 def BuildHooksNodes(self):
8290 """Build hooks nodes.
8293 instance = self._migrater.instance
8294 snodes = list(instance.secondary_nodes)
8295 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8299 class LUInstanceMove(LogicalUnit):
8300 """Move an instance by data-copying.
8303 HPATH = "instance-move"
8304 HTYPE = constants.HTYPE_INSTANCE
8307 def ExpandNames(self):
8308 self._ExpandAndLockInstance()
8309 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8310 self.op.target_node = target_node
8311 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8312 self.needed_locks[locking.LEVEL_NODE_RES] = []
8313 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8315 def DeclareLocks(self, level):
8316 if level == locking.LEVEL_NODE:
8317 self._LockInstancesNodes(primary_only=True)
8318 elif level == locking.LEVEL_NODE_RES:
8320 self.needed_locks[locking.LEVEL_NODE_RES] = \
8321 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8323 def BuildHooksEnv(self):
8326 This runs on master, primary and secondary nodes of the instance.
8330 "TARGET_NODE": self.op.target_node,
8331 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8333 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8336 def BuildHooksNodes(self):
8337 """Build hooks nodes.
8341 self.cfg.GetMasterNode(),
8342 self.instance.primary_node,
8343 self.op.target_node,
8347 def CheckPrereq(self):
8348 """Check prerequisites.
8350 This checks that the instance is in the cluster.
8353 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8354 assert self.instance is not None, \
8355 "Cannot retrieve locked instance %s" % self.op.instance_name
8357 node = self.cfg.GetNodeInfo(self.op.target_node)
8358 assert node is not None, \
8359 "Cannot retrieve locked node %s" % self.op.target_node
8361 self.target_node = target_node = node.name
8363 if target_node == instance.primary_node:
8364 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8365 (instance.name, target_node),
8368 bep = self.cfg.GetClusterInfo().FillBE(instance)
8370 for idx, dsk in enumerate(instance.disks):
8371 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8372 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8373 " cannot copy" % idx, errors.ECODE_STATE)
8375 _CheckNodeOnline(self, target_node)
8376 _CheckNodeNotDrained(self, target_node)
8377 _CheckNodeVmCapable(self, target_node)
8378 cluster = self.cfg.GetClusterInfo()
8379 group_info = self.cfg.GetNodeGroup(node.group)
8380 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8381 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8382 ignore=self.op.ignore_ipolicy)
8384 if instance.admin_state == constants.ADMINST_UP:
8385 # check memory requirements on the secondary node
8386 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8387 instance.name, bep[constants.BE_MAXMEM],
8388 instance.hypervisor)
8390 self.LogInfo("Not checking memory on the secondary node as"
8391 " instance will not be started")
8393 # check bridge existance
8394 _CheckInstanceBridgesExist(self, instance, node=target_node)
8396 def Exec(self, feedback_fn):
8397 """Move an instance.
8399 The move is done by shutting it down on its present node, copying
8400 the data over (slow) and starting it on the new node.
8403 instance = self.instance
8405 source_node = instance.primary_node
8406 target_node = self.target_node
8408 self.LogInfo("Shutting down instance %s on source node %s",
8409 instance.name, source_node)
8411 assert (self.owned_locks(locking.LEVEL_NODE) ==
8412 self.owned_locks(locking.LEVEL_NODE_RES))
8414 result = self.rpc.call_instance_shutdown(source_node, instance,
8415 self.op.shutdown_timeout)
8416 msg = result.fail_msg
8418 if self.op.ignore_consistency:
8419 self.LogWarning("Could not shutdown instance %s on node %s."
8420 " Proceeding anyway. Please make sure node"
8421 " %s is down. Error details: %s",
8422 instance.name, source_node, source_node, msg)
8424 raise errors.OpExecError("Could not shutdown instance %s on"
8426 (instance.name, source_node, msg))
8428 # create the target disks
8430 _CreateDisks(self, instance, target_node=target_node)
8431 except errors.OpExecError:
8432 self.LogWarning("Device creation failed, reverting...")
8434 _RemoveDisks(self, instance, target_node=target_node)
8436 self.cfg.ReleaseDRBDMinors(instance.name)
8439 cluster_name = self.cfg.GetClusterInfo().cluster_name
8442 # activate, get path, copy the data over
8443 for idx, disk in enumerate(instance.disks):
8444 self.LogInfo("Copying data for disk %d", idx)
8445 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8446 instance.name, True, idx)
8448 self.LogWarning("Can't assemble newly created disk %d: %s",
8449 idx, result.fail_msg)
8450 errs.append(result.fail_msg)
8452 dev_path = result.payload
8453 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8454 target_node, dev_path,
8457 self.LogWarning("Can't copy data over for disk %d: %s",
8458 idx, result.fail_msg)
8459 errs.append(result.fail_msg)
8463 self.LogWarning("Some disks failed to copy, aborting")
8465 _RemoveDisks(self, instance, target_node=target_node)
8467 self.cfg.ReleaseDRBDMinors(instance.name)
8468 raise errors.OpExecError("Errors during disk copy: %s" %
8471 instance.primary_node = target_node
8472 self.cfg.Update(instance, feedback_fn)
8474 self.LogInfo("Removing the disks on the original node")
8475 _RemoveDisks(self, instance, target_node=source_node)
8477 # Only start the instance if it's marked as up
8478 if instance.admin_state == constants.ADMINST_UP:
8479 self.LogInfo("Starting instance %s on node %s",
8480 instance.name, target_node)
8482 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8483 ignore_secondaries=True)
8485 _ShutdownInstanceDisks(self, instance)
8486 raise errors.OpExecError("Can't activate the instance's disks")
8488 result = self.rpc.call_instance_start(target_node,
8489 (instance, None, None), False)
8490 msg = result.fail_msg
8492 _ShutdownInstanceDisks(self, instance)
8493 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8494 (instance.name, target_node, msg))
8497 class LUNodeMigrate(LogicalUnit):
8498 """Migrate all instances from a node.
8501 HPATH = "node-migrate"
8502 HTYPE = constants.HTYPE_NODE
8505 def CheckArguments(self):
8508 def ExpandNames(self):
8509 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8511 self.share_locks = _ShareAll()
8512 self.needed_locks = {
8513 locking.LEVEL_NODE: [self.op.node_name],
8516 def BuildHooksEnv(self):
8519 This runs on the master, the primary and all the secondaries.
8523 "NODE_NAME": self.op.node_name,
8524 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8527 def BuildHooksNodes(self):
8528 """Build hooks nodes.
8531 nl = [self.cfg.GetMasterNode()]
8534 def CheckPrereq(self):
8537 def Exec(self, feedback_fn):
8538 # Prepare jobs for migration instances
8539 allow_runtime_changes = self.op.allow_runtime_changes
8541 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8544 iallocator=self.op.iallocator,
8545 target_node=self.op.target_node,
8546 allow_runtime_changes=allow_runtime_changes,
8547 ignore_ipolicy=self.op.ignore_ipolicy)]
8548 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8550 # TODO: Run iallocator in this opcode and pass correct placement options to
8551 # OpInstanceMigrate. Since other jobs can modify the cluster between
8552 # running the iallocator and the actual migration, a good consistency model
8553 # will have to be found.
8555 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8556 frozenset([self.op.node_name]))
8558 return ResultWithJobs(jobs)
8561 class TLMigrateInstance(Tasklet):
8562 """Tasklet class for instance migration.
8565 @ivar live: whether the migration will be done live or non-live;
8566 this variable is initalized only after CheckPrereq has run
8567 @type cleanup: boolean
8568 @ivar cleanup: Wheater we cleanup from a failed migration
8569 @type iallocator: string
8570 @ivar iallocator: The iallocator used to determine target_node
8571 @type target_node: string
8572 @ivar target_node: If given, the target_node to reallocate the instance to
8573 @type failover: boolean
8574 @ivar failover: Whether operation results in failover or migration
8575 @type fallback: boolean
8576 @ivar fallback: Whether fallback to failover is allowed if migration not
8578 @type ignore_consistency: boolean
8579 @ivar ignore_consistency: Wheter we should ignore consistency between source
8581 @type shutdown_timeout: int
8582 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8583 @type ignore_ipolicy: bool
8584 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8589 _MIGRATION_POLL_INTERVAL = 1 # seconds
8590 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8592 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8593 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8595 """Initializes this class.
8598 Tasklet.__init__(self, lu)
8601 self.instance_name = instance_name
8602 self.cleanup = cleanup
8603 self.live = False # will be overridden later
8604 self.failover = failover
8605 self.fallback = fallback
8606 self.ignore_consistency = ignore_consistency
8607 self.shutdown_timeout = shutdown_timeout
8608 self.ignore_ipolicy = ignore_ipolicy
8609 self.allow_runtime_changes = allow_runtime_changes
8611 def CheckPrereq(self):
8612 """Check prerequisites.
8614 This checks that the instance is in the cluster.
8617 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8618 instance = self.cfg.GetInstanceInfo(instance_name)
8619 assert instance is not None
8620 self.instance = instance
8621 cluster = self.cfg.GetClusterInfo()
8623 if (not self.cleanup and
8624 not instance.admin_state == constants.ADMINST_UP and
8625 not self.failover and self.fallback):
8626 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8627 " switching to failover")
8628 self.failover = True
8630 if instance.disk_template not in constants.DTS_MIRRORED:
8635 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8636 " %s" % (instance.disk_template, text),
8639 if instance.disk_template in constants.DTS_EXT_MIRROR:
8640 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8642 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8644 if self.lu.op.iallocator:
8645 self._RunAllocator()
8647 # We set set self.target_node as it is required by
8649 self.target_node = self.lu.op.target_node
8651 # Check that the target node is correct in terms of instance policy
8652 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8653 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8654 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8656 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8657 ignore=self.ignore_ipolicy)
8659 # self.target_node is already populated, either directly or by the
8661 target_node = self.target_node
8662 if self.target_node == instance.primary_node:
8663 raise errors.OpPrereqError("Cannot migrate instance %s"
8664 " to its primary (%s)" %
8665 (instance.name, instance.primary_node),
8668 if len(self.lu.tasklets) == 1:
8669 # It is safe to release locks only when we're the only tasklet
8671 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8672 keep=[instance.primary_node, self.target_node])
8673 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8676 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8678 secondary_nodes = instance.secondary_nodes
8679 if not secondary_nodes:
8680 raise errors.ConfigurationError("No secondary node but using"
8681 " %s disk template" %
8682 instance.disk_template)
8683 target_node = secondary_nodes[0]
8684 if self.lu.op.iallocator or (self.lu.op.target_node and
8685 self.lu.op.target_node != target_node):
8687 text = "failed over"
8690 raise errors.OpPrereqError("Instances with disk template %s cannot"
8691 " be %s to arbitrary nodes"
8692 " (neither an iallocator nor a target"
8693 " node can be passed)" %
8694 (instance.disk_template, text),
8696 nodeinfo = self.cfg.GetNodeInfo(target_node)
8697 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8698 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8700 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8701 ignore=self.ignore_ipolicy)
8703 i_be = cluster.FillBE(instance)
8705 # check memory requirements on the secondary node
8706 if (not self.cleanup and
8707 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8708 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8709 "migrating instance %s" %
8711 i_be[constants.BE_MINMEM],
8712 instance.hypervisor)
8714 self.lu.LogInfo("Not checking memory on the secondary node as"
8715 " instance will not be started")
8717 # check if failover must be forced instead of migration
8718 if (not self.cleanup and not self.failover and
8719 i_be[constants.BE_ALWAYS_FAILOVER]):
8720 self.lu.LogInfo("Instance configured to always failover; fallback"
8722 self.failover = True
8724 # check bridge existance
8725 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8727 if not self.cleanup:
8728 _CheckNodeNotDrained(self.lu, target_node)
8729 if not self.failover:
8730 result = self.rpc.call_instance_migratable(instance.primary_node,
8732 if result.fail_msg and self.fallback:
8733 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8735 self.failover = True
8737 result.Raise("Can't migrate, please use failover",
8738 prereq=True, ecode=errors.ECODE_STATE)
8740 assert not (self.failover and self.cleanup)
8742 if not self.failover:
8743 if self.lu.op.live is not None and self.lu.op.mode is not None:
8744 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8745 " parameters are accepted",
8747 if self.lu.op.live is not None:
8749 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8751 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8752 # reset the 'live' parameter to None so that repeated
8753 # invocations of CheckPrereq do not raise an exception
8754 self.lu.op.live = None
8755 elif self.lu.op.mode is None:
8756 # read the default value from the hypervisor
8757 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8758 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8760 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8762 # Failover is never live
8765 if not (self.failover or self.cleanup):
8766 remote_info = self.rpc.call_instance_info(instance.primary_node,
8768 instance.hypervisor)
8769 remote_info.Raise("Error checking instance on node %s" %
8770 instance.primary_node)
8771 instance_running = bool(remote_info.payload)
8772 if instance_running:
8773 self.current_mem = int(remote_info.payload["memory"])
8775 def _RunAllocator(self):
8776 """Run the allocator based on input opcode.
8779 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8781 # FIXME: add a self.ignore_ipolicy option
8782 req = iallocator.IAReqRelocate(name=self.instance_name,
8783 relocate_from=[self.instance.primary_node])
8784 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8786 ial.Run(self.lu.op.iallocator)
8789 raise errors.OpPrereqError("Can't compute nodes using"
8790 " iallocator '%s': %s" %
8791 (self.lu.op.iallocator, ial.info),
8793 self.target_node = ial.result[0]
8794 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8795 self.instance_name, self.lu.op.iallocator,
8796 utils.CommaJoin(ial.result))
8798 def _WaitUntilSync(self):
8799 """Poll with custom rpc for disk sync.
8801 This uses our own step-based rpc call.
8804 self.feedback_fn("* wait until resync is done")
8808 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8810 (self.instance.disks,
8813 for node, nres in result.items():
8814 nres.Raise("Cannot resync disks on node %s" % node)
8815 node_done, node_percent = nres.payload
8816 all_done = all_done and node_done
8817 if node_percent is not None:
8818 min_percent = min(min_percent, node_percent)
8820 if min_percent < 100:
8821 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8824 def _EnsureSecondary(self, node):
8825 """Demote a node to secondary.
8828 self.feedback_fn("* switching node %s to secondary mode" % node)
8830 for dev in self.instance.disks:
8831 self.cfg.SetDiskID(dev, node)
8833 result = self.rpc.call_blockdev_close(node, self.instance.name,
8834 self.instance.disks)
8835 result.Raise("Cannot change disk to secondary on node %s" % node)
8837 def _GoStandalone(self):
8838 """Disconnect from the network.
8841 self.feedback_fn("* changing into standalone mode")
8842 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8843 self.instance.disks)
8844 for node, nres in result.items():
8845 nres.Raise("Cannot disconnect disks node %s" % node)
8847 def _GoReconnect(self, multimaster):
8848 """Reconnect to the network.
8854 msg = "single-master"
8855 self.feedback_fn("* changing disks into %s mode" % msg)
8856 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8857 (self.instance.disks, self.instance),
8858 self.instance.name, multimaster)
8859 for node, nres in result.items():
8860 nres.Raise("Cannot change disks config on node %s" % node)
8862 def _ExecCleanup(self):
8863 """Try to cleanup after a failed migration.
8865 The cleanup is done by:
8866 - check that the instance is running only on one node
8867 (and update the config if needed)
8868 - change disks on its secondary node to secondary
8869 - wait until disks are fully synchronized
8870 - disconnect from the network
8871 - change disks into single-master mode
8872 - wait again until disks are fully synchronized
8875 instance = self.instance
8876 target_node = self.target_node
8877 source_node = self.source_node
8879 # check running on only one node
8880 self.feedback_fn("* checking where the instance actually runs"
8881 " (if this hangs, the hypervisor might be in"
8883 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8884 for node, result in ins_l.items():
8885 result.Raise("Can't contact node %s" % node)
8887 runningon_source = instance.name in ins_l[source_node].payload
8888 runningon_target = instance.name in ins_l[target_node].payload
8890 if runningon_source and runningon_target:
8891 raise errors.OpExecError("Instance seems to be running on two nodes,"
8892 " or the hypervisor is confused; you will have"
8893 " to ensure manually that it runs only on one"
8894 " and restart this operation")
8896 if not (runningon_source or runningon_target):
8897 raise errors.OpExecError("Instance does not seem to be running at all;"
8898 " in this case it's safer to repair by"
8899 " running 'gnt-instance stop' to ensure disk"
8900 " shutdown, and then restarting it")
8902 if runningon_target:
8903 # the migration has actually succeeded, we need to update the config
8904 self.feedback_fn("* instance running on secondary node (%s),"
8905 " updating config" % target_node)
8906 instance.primary_node = target_node
8907 self.cfg.Update(instance, self.feedback_fn)
8908 demoted_node = source_node
8910 self.feedback_fn("* instance confirmed to be running on its"
8911 " primary node (%s)" % source_node)
8912 demoted_node = target_node
8914 if instance.disk_template in constants.DTS_INT_MIRROR:
8915 self._EnsureSecondary(demoted_node)
8917 self._WaitUntilSync()
8918 except errors.OpExecError:
8919 # we ignore here errors, since if the device is standalone, it
8920 # won't be able to sync
8922 self._GoStandalone()
8923 self._GoReconnect(False)
8924 self._WaitUntilSync()
8926 self.feedback_fn("* done")
8928 def _RevertDiskStatus(self):
8929 """Try to revert the disk status after a failed migration.
8932 target_node = self.target_node
8933 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8937 self._EnsureSecondary(target_node)
8938 self._GoStandalone()
8939 self._GoReconnect(False)
8940 self._WaitUntilSync()
8941 except errors.OpExecError, err:
8942 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8943 " please try to recover the instance manually;"
8944 " error '%s'" % str(err))
8946 def _AbortMigration(self):
8947 """Call the hypervisor code to abort a started migration.
8950 instance = self.instance
8951 target_node = self.target_node
8952 source_node = self.source_node
8953 migration_info = self.migration_info
8955 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8959 abort_msg = abort_result.fail_msg
8961 logging.error("Aborting migration failed on target node %s: %s",
8962 target_node, abort_msg)
8963 # Don't raise an exception here, as we stil have to try to revert the
8964 # disk status, even if this step failed.
8966 abort_result = self.rpc.call_instance_finalize_migration_src(
8967 source_node, instance, False, self.live)
8968 abort_msg = abort_result.fail_msg
8970 logging.error("Aborting migration failed on source node %s: %s",
8971 source_node, abort_msg)
8973 def _ExecMigration(self):
8974 """Migrate an instance.
8976 The migrate is done by:
8977 - change the disks into dual-master mode
8978 - wait until disks are fully synchronized again
8979 - migrate the instance
8980 - change disks on the new secondary node (the old primary) to secondary
8981 - wait until disks are fully synchronized
8982 - change disks into single-master mode
8985 instance = self.instance
8986 target_node = self.target_node
8987 source_node = self.source_node
8989 # Check for hypervisor version mismatch and warn the user.
8990 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8991 None, [self.instance.hypervisor], False)
8992 for ninfo in nodeinfo.values():
8993 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8995 (_, _, (src_info, )) = nodeinfo[source_node].payload
8996 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8998 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8999 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9000 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9001 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9002 if src_version != dst_version:
9003 self.feedback_fn("* warning: hypervisor version mismatch between"
9004 " source (%s) and target (%s) node" %
9005 (src_version, dst_version))
9007 self.feedback_fn("* checking disk consistency between source and target")
9008 for (idx, dev) in enumerate(instance.disks):
9009 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9010 raise errors.OpExecError("Disk %s is degraded or not fully"
9011 " synchronized on target node,"
9012 " aborting migration" % idx)
9014 if self.current_mem > self.tgt_free_mem:
9015 if not self.allow_runtime_changes:
9016 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9017 " free memory to fit instance %s on target"
9018 " node %s (have %dMB, need %dMB)" %
9019 (instance.name, target_node,
9020 self.tgt_free_mem, self.current_mem))
9021 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9022 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9025 rpcres.Raise("Cannot modify instance runtime memory")
9027 # First get the migration information from the remote node
9028 result = self.rpc.call_migration_info(source_node, instance)
9029 msg = result.fail_msg
9031 log_err = ("Failed fetching source migration information from %s: %s" %
9033 logging.error(log_err)
9034 raise errors.OpExecError(log_err)
9036 self.migration_info = migration_info = result.payload
9038 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9039 # Then switch the disks to master/master mode
9040 self._EnsureSecondary(target_node)
9041 self._GoStandalone()
9042 self._GoReconnect(True)
9043 self._WaitUntilSync()
9045 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9046 result = self.rpc.call_accept_instance(target_node,
9049 self.nodes_ip[target_node])
9051 msg = result.fail_msg
9053 logging.error("Instance pre-migration failed, trying to revert"
9054 " disk status: %s", msg)
9055 self.feedback_fn("Pre-migration failed, aborting")
9056 self._AbortMigration()
9057 self._RevertDiskStatus()
9058 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9059 (instance.name, msg))
9061 self.feedback_fn("* migrating instance to %s" % target_node)
9062 result = self.rpc.call_instance_migrate(source_node, instance,
9063 self.nodes_ip[target_node],
9065 msg = result.fail_msg
9067 logging.error("Instance migration failed, trying to revert"
9068 " disk status: %s", msg)
9069 self.feedback_fn("Migration failed, aborting")
9070 self._AbortMigration()
9071 self._RevertDiskStatus()
9072 raise errors.OpExecError("Could not migrate instance %s: %s" %
9073 (instance.name, msg))
9075 self.feedback_fn("* starting memory transfer")
9076 last_feedback = time.time()
9078 result = self.rpc.call_instance_get_migration_status(source_node,
9080 msg = result.fail_msg
9081 ms = result.payload # MigrationStatus instance
9082 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9083 logging.error("Instance migration failed, trying to revert"
9084 " disk status: %s", msg)
9085 self.feedback_fn("Migration failed, aborting")
9086 self._AbortMigration()
9087 self._RevertDiskStatus()
9089 msg = "hypervisor returned failure"
9090 raise errors.OpExecError("Could not migrate instance %s: %s" %
9091 (instance.name, msg))
9093 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9094 self.feedback_fn("* memory transfer complete")
9097 if (utils.TimeoutExpired(last_feedback,
9098 self._MIGRATION_FEEDBACK_INTERVAL) and
9099 ms.transferred_ram is not None):
9100 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9101 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9102 last_feedback = time.time()
9104 time.sleep(self._MIGRATION_POLL_INTERVAL)
9106 result = self.rpc.call_instance_finalize_migration_src(source_node,
9110 msg = result.fail_msg
9112 logging.error("Instance migration succeeded, but finalization failed"
9113 " on the source node: %s", msg)
9114 raise errors.OpExecError("Could not finalize instance migration: %s" %
9117 instance.primary_node = target_node
9119 # distribute new instance config to the other nodes
9120 self.cfg.Update(instance, self.feedback_fn)
9122 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9126 msg = result.fail_msg
9128 logging.error("Instance migration succeeded, but finalization failed"
9129 " on the target node: %s", msg)
9130 raise errors.OpExecError("Could not finalize instance migration: %s" %
9133 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9134 self._EnsureSecondary(source_node)
9135 self._WaitUntilSync()
9136 self._GoStandalone()
9137 self._GoReconnect(False)
9138 self._WaitUntilSync()
9140 # If the instance's disk template is `rbd' or `ext' and there was a
9141 # successful migration, unmap the device from the source node.
9142 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9143 disks = _ExpandCheckDisks(instance, instance.disks)
9144 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9146 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9147 msg = result.fail_msg
9149 logging.error("Migration was successful, but couldn't unmap the"
9150 " block device %s on source node %s: %s",
9151 disk.iv_name, source_node, msg)
9152 logging.error("You need to unmap the device %s manually on %s",
9153 disk.iv_name, source_node)
9155 self.feedback_fn("* done")
9157 def _ExecFailover(self):
9158 """Failover an instance.
9160 The failover is done by shutting it down on its present node and
9161 starting it on the secondary.
9164 instance = self.instance
9165 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9167 source_node = instance.primary_node
9168 target_node = self.target_node
9170 if instance.admin_state == constants.ADMINST_UP:
9171 self.feedback_fn("* checking disk consistency between source and target")
9172 for (idx, dev) in enumerate(instance.disks):
9173 # for drbd, these are drbd over lvm
9174 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9176 if primary_node.offline:
9177 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9179 (primary_node.name, idx, target_node))
9180 elif not self.ignore_consistency:
9181 raise errors.OpExecError("Disk %s is degraded on target node,"
9182 " aborting failover" % idx)
9184 self.feedback_fn("* not checking disk consistency as instance is not"
9187 self.feedback_fn("* shutting down instance on source node")
9188 logging.info("Shutting down instance %s on node %s",
9189 instance.name, source_node)
9191 result = self.rpc.call_instance_shutdown(source_node, instance,
9192 self.shutdown_timeout)
9193 msg = result.fail_msg
9195 if self.ignore_consistency or primary_node.offline:
9196 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9197 " proceeding anyway; please make sure node"
9198 " %s is down; error details: %s",
9199 instance.name, source_node, source_node, msg)
9201 raise errors.OpExecError("Could not shutdown instance %s on"
9203 (instance.name, source_node, msg))
9205 self.feedback_fn("* deactivating the instance's disks on source node")
9206 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9207 raise errors.OpExecError("Can't shut down the instance's disks")
9209 instance.primary_node = target_node
9210 # distribute new instance config to the other nodes
9211 self.cfg.Update(instance, self.feedback_fn)
9213 # Only start the instance if it's marked as up
9214 if instance.admin_state == constants.ADMINST_UP:
9215 self.feedback_fn("* activating the instance's disks on target node %s" %
9217 logging.info("Starting instance %s on node %s",
9218 instance.name, target_node)
9220 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9221 ignore_secondaries=True)
9223 _ShutdownInstanceDisks(self.lu, instance)
9224 raise errors.OpExecError("Can't activate the instance's disks")
9226 self.feedback_fn("* starting the instance on the target node %s" %
9228 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9230 msg = result.fail_msg
9232 _ShutdownInstanceDisks(self.lu, instance)
9233 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9234 (instance.name, target_node, msg))
9236 def Exec(self, feedback_fn):
9237 """Perform the migration.
9240 self.feedback_fn = feedback_fn
9241 self.source_node = self.instance.primary_node
9243 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9244 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9245 self.target_node = self.instance.secondary_nodes[0]
9246 # Otherwise self.target_node has been populated either
9247 # directly, or through an iallocator.
9249 self.all_nodes = [self.source_node, self.target_node]
9250 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9251 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9254 feedback_fn("Failover instance %s" % self.instance.name)
9255 self._ExecFailover()
9257 feedback_fn("Migrating instance %s" % self.instance.name)
9260 return self._ExecCleanup()
9262 return self._ExecMigration()
9265 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9267 """Wrapper around L{_CreateBlockDevInner}.
9269 This method annotates the root device first.
9272 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9273 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9274 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9275 force_open, excl_stor)
9278 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9279 info, force_open, excl_stor):
9280 """Create a tree of block devices on a given node.
9282 If this device type has to be created on secondaries, create it and
9285 If not, just recurse to children keeping the same 'force' value.
9287 @attention: The device has to be annotated already.
9289 @param lu: the lu on whose behalf we execute
9290 @param node: the node on which to create the device
9291 @type instance: L{objects.Instance}
9292 @param instance: the instance which owns the device
9293 @type device: L{objects.Disk}
9294 @param device: the device to create
9295 @type force_create: boolean
9296 @param force_create: whether to force creation of this device; this
9297 will be change to True whenever we find a device which has
9298 CreateOnSecondary() attribute
9299 @param info: the extra 'metadata' we should attach to the device
9300 (this will be represented as a LVM tag)
9301 @type force_open: boolean
9302 @param force_open: this parameter will be passes to the
9303 L{backend.BlockdevCreate} function where it specifies
9304 whether we run on primary or not, and it affects both
9305 the child assembly and the device own Open() execution
9306 @type excl_stor: boolean
9307 @param excl_stor: Whether exclusive_storage is active for the node
9310 if device.CreateOnSecondary():
9314 for child in device.children:
9315 _CreateBlockDevInner(lu, node, instance, child, force_create,
9316 info, force_open, excl_stor)
9318 if not force_create:
9321 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9325 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9327 """Create a single block device on a given node.
9329 This will not recurse over children of the device, so they must be
9332 @param lu: the lu on whose behalf we execute
9333 @param node: the node on which to create the device
9334 @type instance: L{objects.Instance}
9335 @param instance: the instance which owns the device
9336 @type device: L{objects.Disk}
9337 @param device: the device to create
9338 @param info: the extra 'metadata' we should attach to the device
9339 (this will be represented as a LVM tag)
9340 @type force_open: boolean
9341 @param force_open: this parameter will be passes to the
9342 L{backend.BlockdevCreate} function where it specifies
9343 whether we run on primary or not, and it affects both
9344 the child assembly and the device own Open() execution
9345 @type excl_stor: boolean
9346 @param excl_stor: Whether exclusive_storage is active for the node
9349 lu.cfg.SetDiskID(device, node)
9350 result = lu.rpc.call_blockdev_create(node, device, device.size,
9351 instance.name, force_open, info,
9353 result.Raise("Can't create block device %s on"
9354 " node %s for instance %s" % (device, node, instance.name))
9355 if device.physical_id is None:
9356 device.physical_id = result.payload
9359 def _GenerateUniqueNames(lu, exts):
9360 """Generate a suitable LV name.
9362 This will generate a logical volume name for the given instance.
9367 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9368 results.append("%s%s" % (new_id, val))
9372 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9373 iv_name, p_minor, s_minor):
9374 """Generate a drbd8 device complete with its children.
9377 assert len(vgnames) == len(names) == 2
9378 port = lu.cfg.AllocatePort()
9379 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9381 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9382 logical_id=(vgnames[0], names[0]),
9384 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9385 size=constants.DRBD_META_SIZE,
9386 logical_id=(vgnames[1], names[1]),
9388 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9389 logical_id=(primary, secondary, port,
9392 children=[dev_data, dev_meta],
9393 iv_name=iv_name, params={})
9397 _DISK_TEMPLATE_NAME_PREFIX = {
9398 constants.DT_PLAIN: "",
9399 constants.DT_RBD: ".rbd",
9400 constants.DT_EXT: ".ext",
9404 _DISK_TEMPLATE_DEVICE_TYPE = {
9405 constants.DT_PLAIN: constants.LD_LV,
9406 constants.DT_FILE: constants.LD_FILE,
9407 constants.DT_SHARED_FILE: constants.LD_FILE,
9408 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9409 constants.DT_RBD: constants.LD_RBD,
9410 constants.DT_EXT: constants.LD_EXT,
9414 def _GenerateDiskTemplate(
9415 lu, template_name, instance_name, primary_node, secondary_nodes,
9416 disk_info, file_storage_dir, file_driver, base_index,
9417 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9418 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9419 """Generate the entire disk layout for a given template type.
9422 vgname = lu.cfg.GetVGName()
9423 disk_count = len(disk_info)
9426 if template_name == constants.DT_DISKLESS:
9428 elif template_name == constants.DT_DRBD8:
9429 if len(secondary_nodes) != 1:
9430 raise errors.ProgrammerError("Wrong template configuration")
9431 remote_node = secondary_nodes[0]
9432 minors = lu.cfg.AllocateDRBDMinor(
9433 [primary_node, remote_node] * len(disk_info), instance_name)
9435 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9437 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9440 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9441 for i in range(disk_count)]):
9442 names.append(lv_prefix + "_data")
9443 names.append(lv_prefix + "_meta")
9444 for idx, disk in enumerate(disk_info):
9445 disk_index = idx + base_index
9446 data_vg = disk.get(constants.IDISK_VG, vgname)
9447 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9448 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9449 disk[constants.IDISK_SIZE],
9451 names[idx * 2:idx * 2 + 2],
9452 "disk/%d" % disk_index,
9453 minors[idx * 2], minors[idx * 2 + 1])
9454 disk_dev.mode = disk[constants.IDISK_MODE]
9455 disks.append(disk_dev)
9458 raise errors.ProgrammerError("Wrong template configuration")
9460 if template_name == constants.DT_FILE:
9462 elif template_name == constants.DT_SHARED_FILE:
9463 _req_shr_file_storage()
9465 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9466 if name_prefix is None:
9469 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9470 (name_prefix, base_index + i)
9471 for i in range(disk_count)])
9473 if template_name == constants.DT_PLAIN:
9475 def logical_id_fn(idx, _, disk):
9476 vg = disk.get(constants.IDISK_VG, vgname)
9477 return (vg, names[idx])
9479 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9481 lambda _, disk_index, disk: (file_driver,
9482 "%s/disk%d" % (file_storage_dir,
9484 elif template_name == constants.DT_BLOCK:
9486 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9487 disk[constants.IDISK_ADOPT])
9488 elif template_name == constants.DT_RBD:
9489 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9490 elif template_name == constants.DT_EXT:
9491 def logical_id_fn(idx, _, disk):
9492 provider = disk.get(constants.IDISK_PROVIDER, None)
9493 if provider is None:
9494 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9495 " not found", constants.DT_EXT,
9496 constants.IDISK_PROVIDER)
9497 return (provider, names[idx])
9499 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9501 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9503 for idx, disk in enumerate(disk_info):
9505 # Only for the Ext template add disk_info to params
9506 if template_name == constants.DT_EXT:
9507 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9509 if key not in constants.IDISK_PARAMS:
9510 params[key] = disk[key]
9511 disk_index = idx + base_index
9512 size = disk[constants.IDISK_SIZE]
9513 feedback_fn("* disk %s, size %s" %
9514 (disk_index, utils.FormatUnit(size, "h")))
9515 disks.append(objects.Disk(dev_type=dev_type, size=size,
9516 logical_id=logical_id_fn(idx, disk_index, disk),
9517 iv_name="disk/%d" % disk_index,
9518 mode=disk[constants.IDISK_MODE],
9524 def _GetInstanceInfoText(instance):
9525 """Compute that text that should be added to the disk's metadata.
9528 return "originstname+%s" % instance.name
9531 def _CalcEta(time_taken, written, total_size):
9532 """Calculates the ETA based on size written and total size.
9534 @param time_taken: The time taken so far
9535 @param written: amount written so far
9536 @param total_size: The total size of data to be written
9537 @return: The remaining time in seconds
9540 avg_time = time_taken / float(written)
9541 return (total_size - written) * avg_time
9544 def _WipeDisks(lu, instance, disks=None):
9545 """Wipes instance disks.
9547 @type lu: L{LogicalUnit}
9548 @param lu: the logical unit on whose behalf we execute
9549 @type instance: L{objects.Instance}
9550 @param instance: the instance whose disks we should create
9551 @return: the success of the wipe
9554 node = instance.primary_node
9557 disks = [(idx, disk, 0)
9558 for (idx, disk) in enumerate(instance.disks)]
9560 for (_, device, _) in disks:
9561 lu.cfg.SetDiskID(device, node)
9563 logging.info("Pausing synchronization of disks of instance '%s'",
9565 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9566 (map(compat.snd, disks),
9569 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9571 for idx, success in enumerate(result.payload):
9573 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9574 " failed", idx, instance.name)
9577 for (idx, device, offset) in disks:
9578 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9579 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9581 int(min(constants.MAX_WIPE_CHUNK,
9582 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9586 start_time = time.time()
9591 info_text = (" (from %s to %s)" %
9592 (utils.FormatUnit(offset, "h"),
9593 utils.FormatUnit(size, "h")))
9595 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9597 logging.info("Wiping disk %d for instance %s on node %s using"
9598 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9600 while offset < size:
9601 wipe_size = min(wipe_chunk_size, size - offset)
9603 logging.debug("Wiping disk %d, offset %s, chunk %s",
9604 idx, offset, wipe_size)
9606 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9608 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9609 (idx, offset, wipe_size))
9613 if now - last_output >= 60:
9614 eta = _CalcEta(now - start_time, offset, size)
9615 lu.LogInfo(" - done: %.1f%% ETA: %s",
9616 offset / float(size) * 100, utils.FormatSeconds(eta))
9619 logging.info("Resuming synchronization of disks for instance '%s'",
9622 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9623 (map(compat.snd, disks),
9628 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9629 node, result.fail_msg)
9631 for idx, success in enumerate(result.payload):
9633 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9634 " failed", idx, instance.name)
9637 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9638 """Create all disks for an instance.
9640 This abstracts away some work from AddInstance.
9642 @type lu: L{LogicalUnit}
9643 @param lu: the logical unit on whose behalf we execute
9644 @type instance: L{objects.Instance}
9645 @param instance: the instance whose disks we should create
9647 @param to_skip: list of indices to skip
9648 @type target_node: string
9649 @param target_node: if passed, overrides the target node for creation
9651 @return: the success of the creation
9654 info = _GetInstanceInfoText(instance)
9655 if target_node is None:
9656 pnode = instance.primary_node
9657 all_nodes = instance.all_nodes
9662 if instance.disk_template in constants.DTS_FILEBASED:
9663 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9664 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9666 result.Raise("Failed to create directory '%s' on"
9667 " node %s" % (file_storage_dir, pnode))
9669 # Note: this needs to be kept in sync with adding of disks in
9670 # LUInstanceSetParams
9671 for idx, device in enumerate(instance.disks):
9672 if to_skip and idx in to_skip:
9674 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9676 for node in all_nodes:
9677 f_create = node == pnode
9678 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9681 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9682 """Remove all disks for an instance.
9684 This abstracts away some work from `AddInstance()` and
9685 `RemoveInstance()`. Note that in case some of the devices couldn't
9686 be removed, the removal will continue with the other ones (compare
9687 with `_CreateDisks()`).
9689 @type lu: L{LogicalUnit}
9690 @param lu: the logical unit on whose behalf we execute
9691 @type instance: L{objects.Instance}
9692 @param instance: the instance whose disks we should remove
9693 @type target_node: string
9694 @param target_node: used to override the node on which to remove the disks
9696 @return: the success of the removal
9699 logging.info("Removing block devices for instance %s", instance.name)
9702 ports_to_release = set()
9703 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9704 for (idx, device) in enumerate(anno_disks):
9706 edata = [(target_node, device)]
9708 edata = device.ComputeNodeTree(instance.primary_node)
9709 for node, disk in edata:
9710 lu.cfg.SetDiskID(disk, node)
9711 result = lu.rpc.call_blockdev_remove(node, disk)
9713 lu.LogWarning("Could not remove disk %s on node %s,"
9714 " continuing anyway: %s", idx, node, result.fail_msg)
9715 if not (result.offline and node != instance.primary_node):
9718 # if this is a DRBD disk, return its port to the pool
9719 if device.dev_type in constants.LDS_DRBD:
9720 ports_to_release.add(device.logical_id[2])
9722 if all_result or ignore_failures:
9723 for port in ports_to_release:
9724 lu.cfg.AddTcpUdpPort(port)
9726 if instance.disk_template in constants.DTS_FILEBASED:
9727 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9731 tgt = instance.primary_node
9732 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9734 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9735 file_storage_dir, instance.primary_node, result.fail_msg)
9741 def _ComputeDiskSizePerVG(disk_template, disks):
9742 """Compute disk size requirements in the volume group
9745 def _compute(disks, payload):
9746 """Universal algorithm.
9751 vgs[disk[constants.IDISK_VG]] = \
9752 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9756 # Required free disk space as a function of disk and swap space
9758 constants.DT_DISKLESS: {},
9759 constants.DT_PLAIN: _compute(disks, 0),
9760 # 128 MB are added for drbd metadata for each disk
9761 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9762 constants.DT_FILE: {},
9763 constants.DT_SHARED_FILE: {},
9766 if disk_template not in req_size_dict:
9767 raise errors.ProgrammerError("Disk template '%s' size requirement"
9768 " is unknown" % disk_template)
9770 return req_size_dict[disk_template]
9773 def _FilterVmNodes(lu, nodenames):
9774 """Filters out non-vm_capable nodes from a list.
9776 @type lu: L{LogicalUnit}
9777 @param lu: the logical unit for which we check
9778 @type nodenames: list
9779 @param nodenames: the list of nodes on which we should check
9781 @return: the list of vm-capable nodes
9784 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9785 return [name for name in nodenames if name not in vm_nodes]
9788 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9789 """Hypervisor parameter validation.
9791 This function abstract the hypervisor parameter validation to be
9792 used in both instance create and instance modify.
9794 @type lu: L{LogicalUnit}
9795 @param lu: the logical unit for which we check
9796 @type nodenames: list
9797 @param nodenames: the list of nodes on which we should check
9798 @type hvname: string
9799 @param hvname: the name of the hypervisor we should use
9800 @type hvparams: dict
9801 @param hvparams: the parameters which we need to check
9802 @raise errors.OpPrereqError: if the parameters are not valid
9805 nodenames = _FilterVmNodes(lu, nodenames)
9807 cluster = lu.cfg.GetClusterInfo()
9808 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9810 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9811 for node in nodenames:
9815 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9818 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9819 """OS parameters validation.
9821 @type lu: L{LogicalUnit}
9822 @param lu: the logical unit for which we check
9823 @type required: boolean
9824 @param required: whether the validation should fail if the OS is not
9826 @type nodenames: list
9827 @param nodenames: the list of nodes on which we should check
9828 @type osname: string
9829 @param osname: the name of the hypervisor we should use
9830 @type osparams: dict
9831 @param osparams: the parameters which we need to check
9832 @raise errors.OpPrereqError: if the parameters are not valid
9835 nodenames = _FilterVmNodes(lu, nodenames)
9836 result = lu.rpc.call_os_validate(nodenames, required, osname,
9837 [constants.OS_VALIDATE_PARAMETERS],
9839 for node, nres in result.items():
9840 # we don't check for offline cases since this should be run only
9841 # against the master node and/or an instance's nodes
9842 nres.Raise("OS Parameters validation failed on node %s" % node)
9843 if not nres.payload:
9844 lu.LogInfo("OS %s not found on node %s, validation skipped",
9848 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9849 """Wrapper around IAReqInstanceAlloc.
9851 @param op: The instance opcode
9852 @param disks: The computed disks
9853 @param nics: The computed nics
9854 @param beparams: The full filled beparams
9855 @param node_whitelist: List of nodes which should appear as online to the
9856 allocator (unless the node is already marked offline)
9858 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9861 spindle_use = beparams[constants.BE_SPINDLE_USE]
9862 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9863 disk_template=op.disk_template,
9866 vcpus=beparams[constants.BE_VCPUS],
9867 memory=beparams[constants.BE_MAXMEM],
9868 spindle_use=spindle_use,
9870 nics=[n.ToDict() for n in nics],
9871 hypervisor=op.hypervisor,
9872 node_whitelist=node_whitelist)
9875 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9876 """Computes the nics.
9878 @param op: The instance opcode
9879 @param cluster: Cluster configuration object
9880 @param default_ip: The default ip to assign
9881 @param cfg: An instance of the configuration object
9882 @param ec_id: Execution context ID
9884 @returns: The build up nics
9889 nic_mode_req = nic.get(constants.INIC_MODE, None)
9890 nic_mode = nic_mode_req
9891 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9892 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9894 net = nic.get(constants.INIC_NETWORK, None)
9895 link = nic.get(constants.NIC_LINK, None)
9896 ip = nic.get(constants.INIC_IP, None)
9898 if net is None or net.lower() == constants.VALUE_NONE:
9901 if nic_mode_req is not None or link is not None:
9902 raise errors.OpPrereqError("If network is given, no mode or link"
9903 " is allowed to be passed",
9906 # ip validity checks
9907 if ip is None or ip.lower() == constants.VALUE_NONE:
9909 elif ip.lower() == constants.VALUE_AUTO:
9910 if not op.name_check:
9911 raise errors.OpPrereqError("IP address set to auto but name checks"
9912 " have been skipped",
9916 # We defer pool operations until later, so that the iallocator has
9917 # filled in the instance's node(s) dimara
9918 if ip.lower() == constants.NIC_IP_POOL:
9920 raise errors.OpPrereqError("if ip=pool, parameter network"
9921 " must be passed too",
9924 elif not netutils.IPAddress.IsValid(ip):
9925 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9930 # TODO: check the ip address for uniqueness
9931 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9932 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9935 # MAC address verification
9936 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9937 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9938 mac = utils.NormalizeAndValidateMac(mac)
9941 # TODO: We need to factor this out
9942 cfg.ReserveMAC(mac, ec_id)
9943 except errors.ReservationError:
9944 raise errors.OpPrereqError("MAC address %s already in use"
9945 " in cluster" % mac,
9946 errors.ECODE_NOTUNIQUE)
9948 # Build nic parameters
9951 nicparams[constants.NIC_MODE] = nic_mode
9953 nicparams[constants.NIC_LINK] = link
9955 check_params = cluster.SimpleFillNIC(nicparams)
9956 objects.NIC.CheckParameterSyntax(check_params)
9957 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9958 network=net, nicparams=nicparams))
9963 def _ComputeDisks(op, default_vg):
9964 """Computes the instance disks.
9966 @param op: The instance opcode
9967 @param default_vg: The default_vg to assume
9969 @return: The computed disks
9973 for disk in op.disks:
9974 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9975 if mode not in constants.DISK_ACCESS_SET:
9976 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9977 mode, errors.ECODE_INVAL)
9978 size = disk.get(constants.IDISK_SIZE, None)
9980 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9983 except (TypeError, ValueError):
9984 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9987 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9988 if ext_provider and op.disk_template != constants.DT_EXT:
9989 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9990 " disk template, not %s" %
9991 (constants.IDISK_PROVIDER, constants.DT_EXT,
9992 op.disk_template), errors.ECODE_INVAL)
9994 data_vg = disk.get(constants.IDISK_VG, default_vg)
9996 constants.IDISK_SIZE: size,
9997 constants.IDISK_MODE: mode,
9998 constants.IDISK_VG: data_vg,
10001 if constants.IDISK_METAVG in disk:
10002 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10003 if constants.IDISK_ADOPT in disk:
10004 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10006 # For extstorage, demand the `provider' option and add any
10007 # additional parameters (ext-params) to the dict
10008 if op.disk_template == constants.DT_EXT:
10010 new_disk[constants.IDISK_PROVIDER] = ext_provider
10012 if key not in constants.IDISK_PARAMS:
10013 new_disk[key] = disk[key]
10015 raise errors.OpPrereqError("Missing provider for template '%s'" %
10016 constants.DT_EXT, errors.ECODE_INVAL)
10018 disks.append(new_disk)
10023 def _ComputeFullBeParams(op, cluster):
10024 """Computes the full beparams.
10026 @param op: The instance opcode
10027 @param cluster: The cluster config object
10029 @return: The fully filled beparams
10032 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10033 for param, value in op.beparams.iteritems():
10034 if value == constants.VALUE_AUTO:
10035 op.beparams[param] = default_beparams[param]
10036 objects.UpgradeBeParams(op.beparams)
10037 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10038 return cluster.SimpleFillBE(op.beparams)
10041 def _CheckOpportunisticLocking(op):
10042 """Generate error if opportunistic locking is not possible.
10045 if op.opportunistic_locking and not op.iallocator:
10046 raise errors.OpPrereqError("Opportunistic locking is only available in"
10047 " combination with an instance allocator",
10048 errors.ECODE_INVAL)
10051 class LUInstanceCreate(LogicalUnit):
10052 """Create an instance.
10055 HPATH = "instance-add"
10056 HTYPE = constants.HTYPE_INSTANCE
10059 def CheckArguments(self):
10060 """Check arguments.
10063 # do not require name_check to ease forward/backward compatibility
10065 if self.op.no_install and self.op.start:
10066 self.LogInfo("No-installation mode selected, disabling startup")
10067 self.op.start = False
10068 # validate/normalize the instance name
10069 self.op.instance_name = \
10070 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10072 if self.op.ip_check and not self.op.name_check:
10073 # TODO: make the ip check more flexible and not depend on the name check
10074 raise errors.OpPrereqError("Cannot do IP address check without a name"
10075 " check", errors.ECODE_INVAL)
10077 # check nics' parameter names
10078 for nic in self.op.nics:
10079 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10081 # check disks. parameter names and consistent adopt/no-adopt strategy
10082 has_adopt = has_no_adopt = False
10083 for disk in self.op.disks:
10084 if self.op.disk_template != constants.DT_EXT:
10085 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10086 if constants.IDISK_ADOPT in disk:
10089 has_no_adopt = True
10090 if has_adopt and has_no_adopt:
10091 raise errors.OpPrereqError("Either all disks are adopted or none is",
10092 errors.ECODE_INVAL)
10094 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10095 raise errors.OpPrereqError("Disk adoption is not supported for the"
10096 " '%s' disk template" %
10097 self.op.disk_template,
10098 errors.ECODE_INVAL)
10099 if self.op.iallocator is not None:
10100 raise errors.OpPrereqError("Disk adoption not allowed with an"
10101 " iallocator script", errors.ECODE_INVAL)
10102 if self.op.mode == constants.INSTANCE_IMPORT:
10103 raise errors.OpPrereqError("Disk adoption not allowed for"
10104 " instance import", errors.ECODE_INVAL)
10106 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10107 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10108 " but no 'adopt' parameter given" %
10109 self.op.disk_template,
10110 errors.ECODE_INVAL)
10112 self.adopt_disks = has_adopt
10114 # instance name verification
10115 if self.op.name_check:
10116 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10117 self.op.instance_name = self.hostname1.name
10118 # used in CheckPrereq for ip ping check
10119 self.check_ip = self.hostname1.ip
10121 self.check_ip = None
10123 # file storage checks
10124 if (self.op.file_driver and
10125 not self.op.file_driver in constants.FILE_DRIVER):
10126 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10127 self.op.file_driver, errors.ECODE_INVAL)
10129 if self.op.disk_template == constants.DT_FILE:
10130 opcodes.RequireFileStorage()
10131 elif self.op.disk_template == constants.DT_SHARED_FILE:
10132 opcodes.RequireSharedFileStorage()
10134 ### Node/iallocator related checks
10135 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10137 if self.op.pnode is not None:
10138 if self.op.disk_template in constants.DTS_INT_MIRROR:
10139 if self.op.snode is None:
10140 raise errors.OpPrereqError("The networked disk templates need"
10141 " a mirror node", errors.ECODE_INVAL)
10142 elif self.op.snode:
10143 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10145 self.op.snode = None
10147 _CheckOpportunisticLocking(self.op)
10149 self._cds = _GetClusterDomainSecret()
10151 if self.op.mode == constants.INSTANCE_IMPORT:
10152 # On import force_variant must be True, because if we forced it at
10153 # initial install, our only chance when importing it back is that it
10155 self.op.force_variant = True
10157 if self.op.no_install:
10158 self.LogInfo("No-installation mode has no effect during import")
10160 elif self.op.mode == constants.INSTANCE_CREATE:
10161 if self.op.os_type is None:
10162 raise errors.OpPrereqError("No guest OS specified",
10163 errors.ECODE_INVAL)
10164 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10165 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10166 " installation" % self.op.os_type,
10167 errors.ECODE_STATE)
10168 if self.op.disk_template is None:
10169 raise errors.OpPrereqError("No disk template specified",
10170 errors.ECODE_INVAL)
10172 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10173 # Check handshake to ensure both clusters have the same domain secret
10174 src_handshake = self.op.source_handshake
10175 if not src_handshake:
10176 raise errors.OpPrereqError("Missing source handshake",
10177 errors.ECODE_INVAL)
10179 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10182 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10183 errors.ECODE_INVAL)
10185 # Load and check source CA
10186 self.source_x509_ca_pem = self.op.source_x509_ca
10187 if not self.source_x509_ca_pem:
10188 raise errors.OpPrereqError("Missing source X509 CA",
10189 errors.ECODE_INVAL)
10192 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10194 except OpenSSL.crypto.Error, err:
10195 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10196 (err, ), errors.ECODE_INVAL)
10198 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10199 if errcode is not None:
10200 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10201 errors.ECODE_INVAL)
10203 self.source_x509_ca = cert
10205 src_instance_name = self.op.source_instance_name
10206 if not src_instance_name:
10207 raise errors.OpPrereqError("Missing source instance name",
10208 errors.ECODE_INVAL)
10210 self.source_instance_name = \
10211 netutils.GetHostname(name=src_instance_name).name
10214 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10215 self.op.mode, errors.ECODE_INVAL)
10217 def ExpandNames(self):
10218 """ExpandNames for CreateInstance.
10220 Figure out the right locks for instance creation.
10223 self.needed_locks = {}
10225 instance_name = self.op.instance_name
10226 # this is just a preventive check, but someone might still add this
10227 # instance in the meantime, and creation will fail at lock-add time
10228 if instance_name in self.cfg.GetInstanceList():
10229 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10230 instance_name, errors.ECODE_EXISTS)
10232 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10234 if self.op.iallocator:
10235 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10236 # specifying a group on instance creation and then selecting nodes from
10238 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10239 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10241 if self.op.opportunistic_locking:
10242 self.opportunistic_locks[locking.LEVEL_NODE] = True
10243 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10245 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10246 nodelist = [self.op.pnode]
10247 if self.op.snode is not None:
10248 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10249 nodelist.append(self.op.snode)
10250 self.needed_locks[locking.LEVEL_NODE] = nodelist
10252 # in case of import lock the source node too
10253 if self.op.mode == constants.INSTANCE_IMPORT:
10254 src_node = self.op.src_node
10255 src_path = self.op.src_path
10257 if src_path is None:
10258 self.op.src_path = src_path = self.op.instance_name
10260 if src_node is None:
10261 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10262 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10263 self.op.src_node = None
10264 if os.path.isabs(src_path):
10265 raise errors.OpPrereqError("Importing an instance from a path"
10266 " requires a source node option",
10267 errors.ECODE_INVAL)
10269 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10270 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10271 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10272 if not os.path.isabs(src_path):
10273 self.op.src_path = src_path = \
10274 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10276 self.needed_locks[locking.LEVEL_NODE_RES] = \
10277 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10279 def _RunAllocator(self):
10280 """Run the allocator based on input opcode.
10283 if self.op.opportunistic_locking:
10284 # Only consider nodes for which a lock is held
10285 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10287 node_whitelist = None
10289 #TODO Export network to iallocator so that it chooses a pnode
10290 # in a nodegroup that has the desired network connected to
10291 req = _CreateInstanceAllocRequest(self.op, self.disks,
10292 self.nics, self.be_full,
10294 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10296 ial.Run(self.op.iallocator)
10298 if not ial.success:
10299 # When opportunistic locks are used only a temporary failure is generated
10300 if self.op.opportunistic_locking:
10301 ecode = errors.ECODE_TEMP_NORES
10303 ecode = errors.ECODE_NORES
10305 raise errors.OpPrereqError("Can't compute nodes using"
10306 " iallocator '%s': %s" %
10307 (self.op.iallocator, ial.info),
10310 self.op.pnode = ial.result[0]
10311 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10312 self.op.instance_name, self.op.iallocator,
10313 utils.CommaJoin(ial.result))
10315 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10317 if req.RequiredNodes() == 2:
10318 self.op.snode = ial.result[1]
10320 def BuildHooksEnv(self):
10321 """Build hooks env.
10323 This runs on master, primary and secondary nodes of the instance.
10327 "ADD_MODE": self.op.mode,
10329 if self.op.mode == constants.INSTANCE_IMPORT:
10330 env["SRC_NODE"] = self.op.src_node
10331 env["SRC_PATH"] = self.op.src_path
10332 env["SRC_IMAGES"] = self.src_images
10334 env.update(_BuildInstanceHookEnv(
10335 name=self.op.instance_name,
10336 primary_node=self.op.pnode,
10337 secondary_nodes=self.secondaries,
10338 status=self.op.start,
10339 os_type=self.op.os_type,
10340 minmem=self.be_full[constants.BE_MINMEM],
10341 maxmem=self.be_full[constants.BE_MAXMEM],
10342 vcpus=self.be_full[constants.BE_VCPUS],
10343 nics=_NICListToTuple(self, self.nics),
10344 disk_template=self.op.disk_template,
10345 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10346 for d in self.disks],
10349 hypervisor_name=self.op.hypervisor,
10355 def BuildHooksNodes(self):
10356 """Build hooks nodes.
10359 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10362 def _ReadExportInfo(self):
10363 """Reads the export information from disk.
10365 It will override the opcode source node and path with the actual
10366 information, if these two were not specified before.
10368 @return: the export information
10371 assert self.op.mode == constants.INSTANCE_IMPORT
10373 src_node = self.op.src_node
10374 src_path = self.op.src_path
10376 if src_node is None:
10377 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10378 exp_list = self.rpc.call_export_list(locked_nodes)
10380 for node in exp_list:
10381 if exp_list[node].fail_msg:
10383 if src_path in exp_list[node].payload:
10385 self.op.src_node = src_node = node
10386 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10390 raise errors.OpPrereqError("No export found for relative path %s" %
10391 src_path, errors.ECODE_INVAL)
10393 _CheckNodeOnline(self, src_node)
10394 result = self.rpc.call_export_info(src_node, src_path)
10395 result.Raise("No export or invalid export found in dir %s" % src_path)
10397 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10398 if not export_info.has_section(constants.INISECT_EXP):
10399 raise errors.ProgrammerError("Corrupted export config",
10400 errors.ECODE_ENVIRON)
10402 ei_version = export_info.get(constants.INISECT_EXP, "version")
10403 if (int(ei_version) != constants.EXPORT_VERSION):
10404 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10405 (ei_version, constants.EXPORT_VERSION),
10406 errors.ECODE_ENVIRON)
10409 def _ReadExportParams(self, einfo):
10410 """Use export parameters as defaults.
10412 In case the opcode doesn't specify (as in override) some instance
10413 parameters, then try to use them from the export information, if
10414 that declares them.
10417 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10419 if self.op.disk_template is None:
10420 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10421 self.op.disk_template = einfo.get(constants.INISECT_INS,
10423 if self.op.disk_template not in constants.DISK_TEMPLATES:
10424 raise errors.OpPrereqError("Disk template specified in configuration"
10425 " file is not one of the allowed values:"
10427 " ".join(constants.DISK_TEMPLATES),
10428 errors.ECODE_INVAL)
10430 raise errors.OpPrereqError("No disk template specified and the export"
10431 " is missing the disk_template information",
10432 errors.ECODE_INVAL)
10434 if not self.op.disks:
10436 # TODO: import the disk iv_name too
10437 for idx in range(constants.MAX_DISKS):
10438 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10439 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10440 disks.append({constants.IDISK_SIZE: disk_sz})
10441 self.op.disks = disks
10442 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10443 raise errors.OpPrereqError("No disk info specified and the export"
10444 " is missing the disk information",
10445 errors.ECODE_INVAL)
10447 if not self.op.nics:
10449 for idx in range(constants.MAX_NICS):
10450 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10452 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10453 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10458 self.op.nics = nics
10460 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10461 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10463 if (self.op.hypervisor is None and
10464 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10465 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10467 if einfo.has_section(constants.INISECT_HYP):
10468 # use the export parameters but do not override the ones
10469 # specified by the user
10470 for name, value in einfo.items(constants.INISECT_HYP):
10471 if name not in self.op.hvparams:
10472 self.op.hvparams[name] = value
10474 if einfo.has_section(constants.INISECT_BEP):
10475 # use the parameters, without overriding
10476 for name, value in einfo.items(constants.INISECT_BEP):
10477 if name not in self.op.beparams:
10478 self.op.beparams[name] = value
10479 # Compatibility for the old "memory" be param
10480 if name == constants.BE_MEMORY:
10481 if constants.BE_MAXMEM not in self.op.beparams:
10482 self.op.beparams[constants.BE_MAXMEM] = value
10483 if constants.BE_MINMEM not in self.op.beparams:
10484 self.op.beparams[constants.BE_MINMEM] = value
10486 # try to read the parameters old style, from the main section
10487 for name in constants.BES_PARAMETERS:
10488 if (name not in self.op.beparams and
10489 einfo.has_option(constants.INISECT_INS, name)):
10490 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10492 if einfo.has_section(constants.INISECT_OSP):
10493 # use the parameters, without overriding
10494 for name, value in einfo.items(constants.INISECT_OSP):
10495 if name not in self.op.osparams:
10496 self.op.osparams[name] = value
10498 def _RevertToDefaults(self, cluster):
10499 """Revert the instance parameters to the default values.
10503 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10504 for name in self.op.hvparams.keys():
10505 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10506 del self.op.hvparams[name]
10508 be_defs = cluster.SimpleFillBE({})
10509 for name in self.op.beparams.keys():
10510 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10511 del self.op.beparams[name]
10513 nic_defs = cluster.SimpleFillNIC({})
10514 for nic in self.op.nics:
10515 for name in constants.NICS_PARAMETERS:
10516 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10519 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10520 for name in self.op.osparams.keys():
10521 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10522 del self.op.osparams[name]
10524 def _CalculateFileStorageDir(self):
10525 """Calculate final instance file storage dir.
10528 # file storage dir calculation/check
10529 self.instance_file_storage_dir = None
10530 if self.op.disk_template in constants.DTS_FILEBASED:
10531 # build the full file storage dir path
10534 if self.op.disk_template == constants.DT_SHARED_FILE:
10535 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10537 get_fsd_fn = self.cfg.GetFileStorageDir
10539 cfg_storagedir = get_fsd_fn()
10540 if not cfg_storagedir:
10541 raise errors.OpPrereqError("Cluster file storage dir not defined",
10542 errors.ECODE_STATE)
10543 joinargs.append(cfg_storagedir)
10545 if self.op.file_storage_dir is not None:
10546 joinargs.append(self.op.file_storage_dir)
10548 joinargs.append(self.op.instance_name)
10550 # pylint: disable=W0142
10551 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10553 def CheckPrereq(self): # pylint: disable=R0914
10554 """Check prerequisites.
10557 self._CalculateFileStorageDir()
10559 if self.op.mode == constants.INSTANCE_IMPORT:
10560 export_info = self._ReadExportInfo()
10561 self._ReadExportParams(export_info)
10562 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10564 self._old_instance_name = None
10566 if (not self.cfg.GetVGName() and
10567 self.op.disk_template not in constants.DTS_NOT_LVM):
10568 raise errors.OpPrereqError("Cluster does not support lvm-based"
10569 " instances", errors.ECODE_STATE)
10571 if (self.op.hypervisor is None or
10572 self.op.hypervisor == constants.VALUE_AUTO):
10573 self.op.hypervisor = self.cfg.GetHypervisorType()
10575 cluster = self.cfg.GetClusterInfo()
10576 enabled_hvs = cluster.enabled_hypervisors
10577 if self.op.hypervisor not in enabled_hvs:
10578 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10580 (self.op.hypervisor, ",".join(enabled_hvs)),
10581 errors.ECODE_STATE)
10583 # Check tag validity
10584 for tag in self.op.tags:
10585 objects.TaggableObject.ValidateTag(tag)
10587 # check hypervisor parameter syntax (locally)
10588 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10589 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10591 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10592 hv_type.CheckParameterSyntax(filled_hvp)
10593 self.hv_full = filled_hvp
10594 # check that we don't specify global parameters on an instance
10595 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10596 "instance", "cluster")
10598 # fill and remember the beparams dict
10599 self.be_full = _ComputeFullBeParams(self.op, cluster)
10601 # build os parameters
10602 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10604 # now that hvp/bep are in final format, let's reset to defaults,
10606 if self.op.identify_defaults:
10607 self._RevertToDefaults(cluster)
10610 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10611 self.proc.GetECId())
10613 # disk checks/pre-build
10614 default_vg = self.cfg.GetVGName()
10615 self.disks = _ComputeDisks(self.op, default_vg)
10617 if self.op.mode == constants.INSTANCE_IMPORT:
10619 for idx in range(len(self.disks)):
10620 option = "disk%d_dump" % idx
10621 if export_info.has_option(constants.INISECT_INS, option):
10622 # FIXME: are the old os-es, disk sizes, etc. useful?
10623 export_name = export_info.get(constants.INISECT_INS, option)
10624 image = utils.PathJoin(self.op.src_path, export_name)
10625 disk_images.append(image)
10627 disk_images.append(False)
10629 self.src_images = disk_images
10631 if self.op.instance_name == self._old_instance_name:
10632 for idx, nic in enumerate(self.nics):
10633 if nic.mac == constants.VALUE_AUTO:
10634 nic_mac_ini = "nic%d_mac" % idx
10635 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10637 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10639 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10640 if self.op.ip_check:
10641 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10642 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10643 (self.check_ip, self.op.instance_name),
10644 errors.ECODE_NOTUNIQUE)
10646 #### mac address generation
10647 # By generating here the mac address both the allocator and the hooks get
10648 # the real final mac address rather than the 'auto' or 'generate' value.
10649 # There is a race condition between the generation and the instance object
10650 # creation, which means that we know the mac is valid now, but we're not
10651 # sure it will be when we actually add the instance. If things go bad
10652 # adding the instance will abort because of a duplicate mac, and the
10653 # creation job will fail.
10654 for nic in self.nics:
10655 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10656 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10660 if self.op.iallocator is not None:
10661 self._RunAllocator()
10663 # Release all unneeded node locks
10664 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10665 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10666 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10667 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10669 assert (self.owned_locks(locking.LEVEL_NODE) ==
10670 self.owned_locks(locking.LEVEL_NODE_RES)), \
10671 "Node locks differ from node resource locks"
10673 #### node related checks
10675 # check primary node
10676 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10677 assert self.pnode is not None, \
10678 "Cannot retrieve locked node %s" % self.op.pnode
10680 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10681 pnode.name, errors.ECODE_STATE)
10683 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10684 pnode.name, errors.ECODE_STATE)
10685 if not pnode.vm_capable:
10686 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10687 " '%s'" % pnode.name, errors.ECODE_STATE)
10689 self.secondaries = []
10691 # Fill in any IPs from IP pools. This must happen here, because we need to
10692 # know the nic's primary node, as specified by the iallocator
10693 for idx, nic in enumerate(self.nics):
10695 if net is not None:
10696 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10697 if netparams is None:
10698 raise errors.OpPrereqError("No netparams found for network"
10699 " %s. Propably not connected to"
10700 " node's %s nodegroup" %
10701 (net, self.pnode.name),
10702 errors.ECODE_INVAL)
10703 self.LogInfo("NIC/%d inherits netparams %s" %
10704 (idx, netparams.values()))
10705 nic.nicparams = dict(netparams)
10706 if nic.ip is not None:
10707 if nic.ip.lower() == constants.NIC_IP_POOL:
10709 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10710 except errors.ReservationError:
10711 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10712 " from the address pool" % idx,
10713 errors.ECODE_STATE)
10714 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10717 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10718 except errors.ReservationError:
10719 raise errors.OpPrereqError("IP address %s already in use"
10720 " or does not belong to network %s" %
10722 errors.ECODE_NOTUNIQUE)
10724 # net is None, ip None or given
10725 elif self.op.conflicts_check:
10726 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10728 # mirror node verification
10729 if self.op.disk_template in constants.DTS_INT_MIRROR:
10730 if self.op.snode == pnode.name:
10731 raise errors.OpPrereqError("The secondary node cannot be the"
10732 " primary node", errors.ECODE_INVAL)
10733 _CheckNodeOnline(self, self.op.snode)
10734 _CheckNodeNotDrained(self, self.op.snode)
10735 _CheckNodeVmCapable(self, self.op.snode)
10736 self.secondaries.append(self.op.snode)
10738 snode = self.cfg.GetNodeInfo(self.op.snode)
10739 if pnode.group != snode.group:
10740 self.LogWarning("The primary and secondary nodes are in two"
10741 " different node groups; the disk parameters"
10742 " from the first disk's node group will be"
10745 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10747 if self.op.disk_template in constants.DTS_INT_MIRROR:
10748 nodes.append(snode)
10749 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10750 if compat.any(map(has_es, nodes)):
10751 raise errors.OpPrereqError("Disk template %s not supported with"
10752 " exclusive storage" % self.op.disk_template,
10753 errors.ECODE_STATE)
10755 nodenames = [pnode.name] + self.secondaries
10757 # Verify instance specs
10758 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10760 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10761 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10762 constants.ISPEC_DISK_COUNT: len(self.disks),
10763 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10764 constants.ISPEC_NIC_COUNT: len(self.nics),
10765 constants.ISPEC_SPINDLE_USE: spindle_use,
10768 group_info = self.cfg.GetNodeGroup(pnode.group)
10769 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10770 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10771 if not self.op.ignore_ipolicy and res:
10772 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10773 (pnode.group, group_info.name, utils.CommaJoin(res)))
10774 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10776 if not self.adopt_disks:
10777 if self.op.disk_template == constants.DT_RBD:
10778 # _CheckRADOSFreeSpace() is just a placeholder.
10779 # Any function that checks prerequisites can be placed here.
10780 # Check if there is enough space on the RADOS cluster.
10781 _CheckRADOSFreeSpace()
10782 elif self.op.disk_template == constants.DT_EXT:
10783 # FIXME: Function that checks prereqs if needed
10786 # Check lv size requirements, if not adopting
10787 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10788 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10790 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10791 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10792 disk[constants.IDISK_ADOPT])
10793 for disk in self.disks])
10794 if len(all_lvs) != len(self.disks):
10795 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10796 errors.ECODE_INVAL)
10797 for lv_name in all_lvs:
10799 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10800 # to ReserveLV uses the same syntax
10801 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10802 except errors.ReservationError:
10803 raise errors.OpPrereqError("LV named %s used by another instance" %
10804 lv_name, errors.ECODE_NOTUNIQUE)
10806 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10807 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10809 node_lvs = self.rpc.call_lv_list([pnode.name],
10810 vg_names.payload.keys())[pnode.name]
10811 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10812 node_lvs = node_lvs.payload
10814 delta = all_lvs.difference(node_lvs.keys())
10816 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10817 utils.CommaJoin(delta),
10818 errors.ECODE_INVAL)
10819 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10821 raise errors.OpPrereqError("Online logical volumes found, cannot"
10822 " adopt: %s" % utils.CommaJoin(online_lvs),
10823 errors.ECODE_STATE)
10824 # update the size of disk based on what is found
10825 for dsk in self.disks:
10826 dsk[constants.IDISK_SIZE] = \
10827 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10828 dsk[constants.IDISK_ADOPT])][0]))
10830 elif self.op.disk_template == constants.DT_BLOCK:
10831 # Normalize and de-duplicate device paths
10832 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10833 for disk in self.disks])
10834 if len(all_disks) != len(self.disks):
10835 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10836 errors.ECODE_INVAL)
10837 baddisks = [d for d in all_disks
10838 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10840 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10841 " cannot be adopted" %
10842 (utils.CommaJoin(baddisks),
10843 constants.ADOPTABLE_BLOCKDEV_ROOT),
10844 errors.ECODE_INVAL)
10846 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10847 list(all_disks))[pnode.name]
10848 node_disks.Raise("Cannot get block device information from node %s" %
10850 node_disks = node_disks.payload
10851 delta = all_disks.difference(node_disks.keys())
10853 raise errors.OpPrereqError("Missing block device(s): %s" %
10854 utils.CommaJoin(delta),
10855 errors.ECODE_INVAL)
10856 for dsk in self.disks:
10857 dsk[constants.IDISK_SIZE] = \
10858 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10860 # Verify instance specs
10861 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10863 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10864 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10865 constants.ISPEC_DISK_COUNT: len(self.disks),
10866 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10867 for disk in self.disks],
10868 constants.ISPEC_NIC_COUNT: len(self.nics),
10869 constants.ISPEC_SPINDLE_USE: spindle_use,
10872 group_info = self.cfg.GetNodeGroup(pnode.group)
10873 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10874 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10875 if not self.op.ignore_ipolicy and res:
10876 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10877 " policy: %s") % (pnode.group,
10878 utils.CommaJoin(res)),
10879 errors.ECODE_INVAL)
10881 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10883 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10884 # check OS parameters (remotely)
10885 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10887 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10889 #TODO: _CheckExtParams (remotely)
10890 # Check parameters for extstorage
10892 # memory check on primary node
10893 #TODO(dynmem): use MINMEM for checking
10895 _CheckNodeFreeMemory(self, self.pnode.name,
10896 "creating instance %s" % self.op.instance_name,
10897 self.be_full[constants.BE_MAXMEM],
10898 self.op.hypervisor)
10900 self.dry_run_result = list(nodenames)
10902 def Exec(self, feedback_fn):
10903 """Create and add the instance to the cluster.
10906 instance = self.op.instance_name
10907 pnode_name = self.pnode.name
10909 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10910 self.owned_locks(locking.LEVEL_NODE)), \
10911 "Node locks differ from node resource locks"
10912 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10914 ht_kind = self.op.hypervisor
10915 if ht_kind in constants.HTS_REQ_PORT:
10916 network_port = self.cfg.AllocatePort()
10918 network_port = None
10920 # This is ugly but we got a chicken-egg problem here
10921 # We can only take the group disk parameters, as the instance
10922 # has no disks yet (we are generating them right here).
10923 node = self.cfg.GetNodeInfo(pnode_name)
10924 nodegroup = self.cfg.GetNodeGroup(node.group)
10925 disks = _GenerateDiskTemplate(self,
10926 self.op.disk_template,
10927 instance, pnode_name,
10930 self.instance_file_storage_dir,
10931 self.op.file_driver,
10934 self.cfg.GetGroupDiskParams(nodegroup))
10936 iobj = objects.Instance(name=instance, os=self.op.os_type,
10937 primary_node=pnode_name,
10938 nics=self.nics, disks=disks,
10939 disk_template=self.op.disk_template,
10940 admin_state=constants.ADMINST_DOWN,
10941 network_port=network_port,
10942 beparams=self.op.beparams,
10943 hvparams=self.op.hvparams,
10944 hypervisor=self.op.hypervisor,
10945 osparams=self.op.osparams,
10949 for tag in self.op.tags:
10952 if self.adopt_disks:
10953 if self.op.disk_template == constants.DT_PLAIN:
10954 # rename LVs to the newly-generated names; we need to construct
10955 # 'fake' LV disks with the old data, plus the new unique_id
10956 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10958 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10959 rename_to.append(t_dsk.logical_id)
10960 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10961 self.cfg.SetDiskID(t_dsk, pnode_name)
10962 result = self.rpc.call_blockdev_rename(pnode_name,
10963 zip(tmp_disks, rename_to))
10964 result.Raise("Failed to rename adoped LVs")
10966 feedback_fn("* creating instance disks...")
10968 _CreateDisks(self, iobj)
10969 except errors.OpExecError:
10970 self.LogWarning("Device creation failed, reverting...")
10972 _RemoveDisks(self, iobj)
10974 self.cfg.ReleaseDRBDMinors(instance)
10977 feedback_fn("adding instance %s to cluster config" % instance)
10979 self.cfg.AddInstance(iobj, self.proc.GetECId())
10981 # Declare that we don't want to remove the instance lock anymore, as we've
10982 # added the instance to the config
10983 del self.remove_locks[locking.LEVEL_INSTANCE]
10985 if self.op.mode == constants.INSTANCE_IMPORT:
10986 # Release unused nodes
10987 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10989 # Release all nodes
10990 _ReleaseLocks(self, locking.LEVEL_NODE)
10993 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10994 feedback_fn("* wiping instance disks...")
10996 _WipeDisks(self, iobj)
10997 except errors.OpExecError, err:
10998 logging.exception("Wiping disks failed")
10999 self.LogWarning("Wiping instance disks failed (%s)", err)
11003 # Something is already wrong with the disks, don't do anything else
11005 elif self.op.wait_for_sync:
11006 disk_abort = not _WaitForSync(self, iobj)
11007 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11008 # make sure the disks are not degraded (still sync-ing is ok)
11009 feedback_fn("* checking mirrors status")
11010 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11015 _RemoveDisks(self, iobj)
11016 self.cfg.RemoveInstance(iobj.name)
11017 # Make sure the instance lock gets removed
11018 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11019 raise errors.OpExecError("There are some degraded disks for"
11022 # Release all node resource locks
11023 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11025 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11026 # we need to set the disks ID to the primary node, since the
11027 # preceding code might or might have not done it, depending on
11028 # disk template and other options
11029 for disk in iobj.disks:
11030 self.cfg.SetDiskID(disk, pnode_name)
11031 if self.op.mode == constants.INSTANCE_CREATE:
11032 if not self.op.no_install:
11033 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11034 not self.op.wait_for_sync)
11036 feedback_fn("* pausing disk sync to install instance OS")
11037 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11040 for idx, success in enumerate(result.payload):
11042 logging.warn("pause-sync of instance %s for disk %d failed",
11045 feedback_fn("* running the instance OS create scripts...")
11046 # FIXME: pass debug option from opcode to backend
11048 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11049 self.op.debug_level)
11051 feedback_fn("* resuming disk sync")
11052 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11055 for idx, success in enumerate(result.payload):
11057 logging.warn("resume-sync of instance %s for disk %d failed",
11060 os_add_result.Raise("Could not add os for instance %s"
11061 " on node %s" % (instance, pnode_name))
11064 if self.op.mode == constants.INSTANCE_IMPORT:
11065 feedback_fn("* running the instance OS import scripts...")
11069 for idx, image in enumerate(self.src_images):
11073 # FIXME: pass debug option from opcode to backend
11074 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11075 constants.IEIO_FILE, (image, ),
11076 constants.IEIO_SCRIPT,
11077 (iobj.disks[idx], idx),
11079 transfers.append(dt)
11082 masterd.instance.TransferInstanceData(self, feedback_fn,
11083 self.op.src_node, pnode_name,
11084 self.pnode.secondary_ip,
11086 if not compat.all(import_result):
11087 self.LogWarning("Some disks for instance %s on node %s were not"
11088 " imported successfully" % (instance, pnode_name))
11090 rename_from = self._old_instance_name
11092 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11093 feedback_fn("* preparing remote import...")
11094 # The source cluster will stop the instance before attempting to make
11095 # a connection. In some cases stopping an instance can take a long
11096 # time, hence the shutdown timeout is added to the connection
11098 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11099 self.op.source_shutdown_timeout)
11100 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11102 assert iobj.primary_node == self.pnode.name
11104 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11105 self.source_x509_ca,
11106 self._cds, timeouts)
11107 if not compat.all(disk_results):
11108 # TODO: Should the instance still be started, even if some disks
11109 # failed to import (valid for local imports, too)?
11110 self.LogWarning("Some disks for instance %s on node %s were not"
11111 " imported successfully" % (instance, pnode_name))
11113 rename_from = self.source_instance_name
11116 # also checked in the prereq part
11117 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11120 # Run rename script on newly imported instance
11121 assert iobj.name == instance
11122 feedback_fn("Running rename script for %s" % instance)
11123 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11125 self.op.debug_level)
11126 if result.fail_msg:
11127 self.LogWarning("Failed to run rename script for %s on node"
11128 " %s: %s" % (instance, pnode_name, result.fail_msg))
11130 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11133 iobj.admin_state = constants.ADMINST_UP
11134 self.cfg.Update(iobj, feedback_fn)
11135 logging.info("Starting instance %s on node %s", instance, pnode_name)
11136 feedback_fn("* starting instance...")
11137 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11139 result.Raise("Could not start instance")
11141 return list(iobj.all_nodes)
11144 class LUInstanceMultiAlloc(NoHooksLU):
11145 """Allocates multiple instances at the same time.
11150 def CheckArguments(self):
11151 """Check arguments.
11155 for inst in self.op.instances:
11156 if inst.iallocator is not None:
11157 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11158 " instance objects", errors.ECODE_INVAL)
11159 nodes.append(bool(inst.pnode))
11160 if inst.disk_template in constants.DTS_INT_MIRROR:
11161 nodes.append(bool(inst.snode))
11163 has_nodes = compat.any(nodes)
11164 if compat.all(nodes) ^ has_nodes:
11165 raise errors.OpPrereqError("There are instance objects providing"
11166 " pnode/snode while others do not",
11167 errors.ECODE_INVAL)
11169 if self.op.iallocator is None:
11170 default_iallocator = self.cfg.GetDefaultIAllocator()
11171 if default_iallocator and has_nodes:
11172 self.op.iallocator = default_iallocator
11174 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11175 " given and no cluster-wide default"
11176 " iallocator found; please specify either"
11177 " an iallocator or nodes on the instances"
11178 " or set a cluster-wide default iallocator",
11179 errors.ECODE_INVAL)
11181 _CheckOpportunisticLocking(self.op)
11183 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11185 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11186 utils.CommaJoin(dups), errors.ECODE_INVAL)
11188 def ExpandNames(self):
11189 """Calculate the locks.
11192 self.share_locks = _ShareAll()
11193 self.needed_locks = {
11194 # iallocator will select nodes and even if no iallocator is used,
11195 # collisions with LUInstanceCreate should be avoided
11196 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11199 if self.op.iallocator:
11200 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11201 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11203 if self.op.opportunistic_locking:
11204 self.opportunistic_locks[locking.LEVEL_NODE] = True
11205 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11208 for inst in self.op.instances:
11209 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11210 nodeslist.append(inst.pnode)
11211 if inst.snode is not None:
11212 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11213 nodeslist.append(inst.snode)
11215 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11216 # Lock resources of instance's primary and secondary nodes (copy to
11217 # prevent accidential modification)
11218 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11220 def CheckPrereq(self):
11221 """Check prerequisite.
11224 cluster = self.cfg.GetClusterInfo()
11225 default_vg = self.cfg.GetVGName()
11226 ec_id = self.proc.GetECId()
11228 if self.op.opportunistic_locking:
11229 # Only consider nodes for which a lock is held
11230 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11232 node_whitelist = None
11234 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11235 _ComputeNics(op, cluster, None,
11237 _ComputeFullBeParams(op, cluster),
11239 for op in self.op.instances]
11241 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11242 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11244 ial.Run(self.op.iallocator)
11246 if not ial.success:
11247 raise errors.OpPrereqError("Can't compute nodes using"
11248 " iallocator '%s': %s" %
11249 (self.op.iallocator, ial.info),
11250 errors.ECODE_NORES)
11252 self.ia_result = ial.result
11254 if self.op.dry_run:
11255 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11256 constants.JOB_IDS_KEY: [],
11259 def _ConstructPartialResult(self):
11260 """Contructs the partial result.
11263 (allocatable, failed) = self.ia_result
11265 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11266 map(compat.fst, allocatable),
11267 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11270 def Exec(self, feedback_fn):
11271 """Executes the opcode.
11274 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11275 (allocatable, failed) = self.ia_result
11278 for (name, nodes) in allocatable:
11279 op = op2inst.pop(name)
11282 (op.pnode, op.snode) = nodes
11284 (op.pnode,) = nodes
11288 missing = set(op2inst.keys()) - set(failed)
11289 assert not missing, \
11290 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11292 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11295 def _CheckRADOSFreeSpace():
11296 """Compute disk size requirements inside the RADOS cluster.
11299 # For the RADOS cluster we assume there is always enough space.
11303 class LUInstanceConsole(NoHooksLU):
11304 """Connect to an instance's console.
11306 This is somewhat special in that it returns the command line that
11307 you need to run on the master node in order to connect to the
11313 def ExpandNames(self):
11314 self.share_locks = _ShareAll()
11315 self._ExpandAndLockInstance()
11317 def CheckPrereq(self):
11318 """Check prerequisites.
11320 This checks that the instance is in the cluster.
11323 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11324 assert self.instance is not None, \
11325 "Cannot retrieve locked instance %s" % self.op.instance_name
11326 _CheckNodeOnline(self, self.instance.primary_node)
11328 def Exec(self, feedback_fn):
11329 """Connect to the console of an instance
11332 instance = self.instance
11333 node = instance.primary_node
11335 node_insts = self.rpc.call_instance_list([node],
11336 [instance.hypervisor])[node]
11337 node_insts.Raise("Can't get node information from %s" % node)
11339 if instance.name not in node_insts.payload:
11340 if instance.admin_state == constants.ADMINST_UP:
11341 state = constants.INSTST_ERRORDOWN
11342 elif instance.admin_state == constants.ADMINST_DOWN:
11343 state = constants.INSTST_ADMINDOWN
11345 state = constants.INSTST_ADMINOFFLINE
11346 raise errors.OpExecError("Instance %s is not running (state %s)" %
11347 (instance.name, state))
11349 logging.debug("Connecting to console of %s on %s", instance.name, node)
11351 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11354 def _GetInstanceConsole(cluster, instance):
11355 """Returns console information for an instance.
11357 @type cluster: L{objects.Cluster}
11358 @type instance: L{objects.Instance}
11362 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11363 # beparams and hvparams are passed separately, to avoid editing the
11364 # instance and then saving the defaults in the instance itself.
11365 hvparams = cluster.FillHV(instance)
11366 beparams = cluster.FillBE(instance)
11367 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11369 assert console.instance == instance.name
11370 assert console.Validate()
11372 return console.ToDict()
11375 class LUInstanceReplaceDisks(LogicalUnit):
11376 """Replace the disks of an instance.
11379 HPATH = "mirrors-replace"
11380 HTYPE = constants.HTYPE_INSTANCE
11383 def CheckArguments(self):
11384 """Check arguments.
11387 remote_node = self.op.remote_node
11388 ialloc = self.op.iallocator
11389 if self.op.mode == constants.REPLACE_DISK_CHG:
11390 if remote_node is None and ialloc is None:
11391 raise errors.OpPrereqError("When changing the secondary either an"
11392 " iallocator script must be used or the"
11393 " new node given", errors.ECODE_INVAL)
11395 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11397 elif remote_node is not None or ialloc is not None:
11398 # Not replacing the secondary
11399 raise errors.OpPrereqError("The iallocator and new node options can"
11400 " only be used when changing the"
11401 " secondary node", errors.ECODE_INVAL)
11403 def ExpandNames(self):
11404 self._ExpandAndLockInstance()
11406 assert locking.LEVEL_NODE not in self.needed_locks
11407 assert locking.LEVEL_NODE_RES not in self.needed_locks
11408 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11410 assert self.op.iallocator is None or self.op.remote_node is None, \
11411 "Conflicting options"
11413 if self.op.remote_node is not None:
11414 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11416 # Warning: do not remove the locking of the new secondary here
11417 # unless DRBD8.AddChildren is changed to work in parallel;
11418 # currently it doesn't since parallel invocations of
11419 # FindUnusedMinor will conflict
11420 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11421 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11423 self.needed_locks[locking.LEVEL_NODE] = []
11424 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11426 if self.op.iallocator is not None:
11427 # iallocator will select a new node in the same group
11428 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11429 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11431 self.needed_locks[locking.LEVEL_NODE_RES] = []
11433 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11434 self.op.iallocator, self.op.remote_node,
11435 self.op.disks, self.op.early_release,
11436 self.op.ignore_ipolicy)
11438 self.tasklets = [self.replacer]
11440 def DeclareLocks(self, level):
11441 if level == locking.LEVEL_NODEGROUP:
11442 assert self.op.remote_node is None
11443 assert self.op.iallocator is not None
11444 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11446 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11447 # Lock all groups used by instance optimistically; this requires going
11448 # via the node before it's locked, requiring verification later on
11449 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11450 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11452 elif level == locking.LEVEL_NODE:
11453 if self.op.iallocator is not None:
11454 assert self.op.remote_node is None
11455 assert not self.needed_locks[locking.LEVEL_NODE]
11456 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11458 # Lock member nodes of all locked groups
11459 self.needed_locks[locking.LEVEL_NODE] = \
11461 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11462 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11464 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11466 self._LockInstancesNodes()
11468 elif level == locking.LEVEL_NODE_RES:
11470 self.needed_locks[locking.LEVEL_NODE_RES] = \
11471 self.needed_locks[locking.LEVEL_NODE]
11473 def BuildHooksEnv(self):
11474 """Build hooks env.
11476 This runs on the master, the primary and all the secondaries.
11479 instance = self.replacer.instance
11481 "MODE": self.op.mode,
11482 "NEW_SECONDARY": self.op.remote_node,
11483 "OLD_SECONDARY": instance.secondary_nodes[0],
11485 env.update(_BuildInstanceHookEnvByObject(self, instance))
11488 def BuildHooksNodes(self):
11489 """Build hooks nodes.
11492 instance = self.replacer.instance
11494 self.cfg.GetMasterNode(),
11495 instance.primary_node,
11497 if self.op.remote_node is not None:
11498 nl.append(self.op.remote_node)
11501 def CheckPrereq(self):
11502 """Check prerequisites.
11505 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11506 self.op.iallocator is None)
11508 # Verify if node group locks are still correct
11509 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11511 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11513 return LogicalUnit.CheckPrereq(self)
11516 class TLReplaceDisks(Tasklet):
11517 """Replaces disks for an instance.
11519 Note: Locking is not within the scope of this class.
11522 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11523 disks, early_release, ignore_ipolicy):
11524 """Initializes this class.
11527 Tasklet.__init__(self, lu)
11530 self.instance_name = instance_name
11532 self.iallocator_name = iallocator_name
11533 self.remote_node = remote_node
11535 self.early_release = early_release
11536 self.ignore_ipolicy = ignore_ipolicy
11539 self.instance = None
11540 self.new_node = None
11541 self.target_node = None
11542 self.other_node = None
11543 self.remote_node_info = None
11544 self.node_secondary_ip = None
11547 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11548 """Compute a new secondary node using an IAllocator.
11551 req = iallocator.IAReqRelocate(name=instance_name,
11552 relocate_from=list(relocate_from))
11553 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11555 ial.Run(iallocator_name)
11557 if not ial.success:
11558 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11559 " %s" % (iallocator_name, ial.info),
11560 errors.ECODE_NORES)
11562 remote_node_name = ial.result[0]
11564 lu.LogInfo("Selected new secondary for instance '%s': %s",
11565 instance_name, remote_node_name)
11567 return remote_node_name
11569 def _FindFaultyDisks(self, node_name):
11570 """Wrapper for L{_FindFaultyInstanceDisks}.
11573 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11576 def _CheckDisksActivated(self, instance):
11577 """Checks if the instance disks are activated.
11579 @param instance: The instance to check disks
11580 @return: True if they are activated, False otherwise
11583 nodes = instance.all_nodes
11585 for idx, dev in enumerate(instance.disks):
11587 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11588 self.cfg.SetDiskID(dev, node)
11590 result = _BlockdevFind(self, node, dev, instance)
11594 elif result.fail_msg or not result.payload:
11599 def CheckPrereq(self):
11600 """Check prerequisites.
11602 This checks that the instance is in the cluster.
11605 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11606 assert instance is not None, \
11607 "Cannot retrieve locked instance %s" % self.instance_name
11609 if instance.disk_template != constants.DT_DRBD8:
11610 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11611 " instances", errors.ECODE_INVAL)
11613 if len(instance.secondary_nodes) != 1:
11614 raise errors.OpPrereqError("The instance has a strange layout,"
11615 " expected one secondary but found %d" %
11616 len(instance.secondary_nodes),
11617 errors.ECODE_FAULT)
11619 instance = self.instance
11620 secondary_node = instance.secondary_nodes[0]
11622 if self.iallocator_name is None:
11623 remote_node = self.remote_node
11625 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11626 instance.name, instance.secondary_nodes)
11628 if remote_node is None:
11629 self.remote_node_info = None
11631 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11632 "Remote node '%s' is not locked" % remote_node
11634 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11635 assert self.remote_node_info is not None, \
11636 "Cannot retrieve locked node %s" % remote_node
11638 if remote_node == self.instance.primary_node:
11639 raise errors.OpPrereqError("The specified node is the primary node of"
11640 " the instance", errors.ECODE_INVAL)
11642 if remote_node == secondary_node:
11643 raise errors.OpPrereqError("The specified node is already the"
11644 " secondary node of the instance",
11645 errors.ECODE_INVAL)
11647 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11648 constants.REPLACE_DISK_CHG):
11649 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11650 errors.ECODE_INVAL)
11652 if self.mode == constants.REPLACE_DISK_AUTO:
11653 if not self._CheckDisksActivated(instance):
11654 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11655 " first" % self.instance_name,
11656 errors.ECODE_STATE)
11657 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11658 faulty_secondary = self._FindFaultyDisks(secondary_node)
11660 if faulty_primary and faulty_secondary:
11661 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11662 " one node and can not be repaired"
11663 " automatically" % self.instance_name,
11664 errors.ECODE_STATE)
11667 self.disks = faulty_primary
11668 self.target_node = instance.primary_node
11669 self.other_node = secondary_node
11670 check_nodes = [self.target_node, self.other_node]
11671 elif faulty_secondary:
11672 self.disks = faulty_secondary
11673 self.target_node = secondary_node
11674 self.other_node = instance.primary_node
11675 check_nodes = [self.target_node, self.other_node]
11681 # Non-automatic modes
11682 if self.mode == constants.REPLACE_DISK_PRI:
11683 self.target_node = instance.primary_node
11684 self.other_node = secondary_node
11685 check_nodes = [self.target_node, self.other_node]
11687 elif self.mode == constants.REPLACE_DISK_SEC:
11688 self.target_node = secondary_node
11689 self.other_node = instance.primary_node
11690 check_nodes = [self.target_node, self.other_node]
11692 elif self.mode == constants.REPLACE_DISK_CHG:
11693 self.new_node = remote_node
11694 self.other_node = instance.primary_node
11695 self.target_node = secondary_node
11696 check_nodes = [self.new_node, self.other_node]
11698 _CheckNodeNotDrained(self.lu, remote_node)
11699 _CheckNodeVmCapable(self.lu, remote_node)
11701 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11702 assert old_node_info is not None
11703 if old_node_info.offline and not self.early_release:
11704 # doesn't make sense to delay the release
11705 self.early_release = True
11706 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11707 " early-release mode", secondary_node)
11710 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11713 # If not specified all disks should be replaced
11715 self.disks = range(len(self.instance.disks))
11717 # TODO: This is ugly, but right now we can't distinguish between internal
11718 # submitted opcode and external one. We should fix that.
11719 if self.remote_node_info:
11720 # We change the node, lets verify it still meets instance policy
11721 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11722 cluster = self.cfg.GetClusterInfo()
11723 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11725 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11726 ignore=self.ignore_ipolicy)
11728 for node in check_nodes:
11729 _CheckNodeOnline(self.lu, node)
11731 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11734 if node_name is not None)
11736 # Release unneeded node and node resource locks
11737 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11738 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11739 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11741 # Release any owned node group
11742 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11744 # Check whether disks are valid
11745 for disk_idx in self.disks:
11746 instance.FindDisk(disk_idx)
11748 # Get secondary node IP addresses
11749 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11750 in self.cfg.GetMultiNodeInfo(touched_nodes))
11752 def Exec(self, feedback_fn):
11753 """Execute disk replacement.
11755 This dispatches the disk replacement to the appropriate handler.
11759 # Verify owned locks before starting operation
11760 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11761 assert set(owned_nodes) == set(self.node_secondary_ip), \
11762 ("Incorrect node locks, owning %s, expected %s" %
11763 (owned_nodes, self.node_secondary_ip.keys()))
11764 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11765 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11766 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11768 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11769 assert list(owned_instances) == [self.instance_name], \
11770 "Instance '%s' not locked" % self.instance_name
11772 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11773 "Should not own any node group lock at this point"
11776 feedback_fn("No disks need replacement for instance '%s'" %
11777 self.instance.name)
11780 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11781 (utils.CommaJoin(self.disks), self.instance.name))
11782 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11783 feedback_fn("Current seconary node: %s" %
11784 utils.CommaJoin(self.instance.secondary_nodes))
11786 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11788 # Activate the instance disks if we're replacing them on a down instance
11790 _StartInstanceDisks(self.lu, self.instance, True)
11793 # Should we replace the secondary node?
11794 if self.new_node is not None:
11795 fn = self._ExecDrbd8Secondary
11797 fn = self._ExecDrbd8DiskOnly
11799 result = fn(feedback_fn)
11801 # Deactivate the instance disks if we're replacing them on a
11804 _SafeShutdownInstanceDisks(self.lu, self.instance)
11806 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11809 # Verify owned locks
11810 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11811 nodes = frozenset(self.node_secondary_ip)
11812 assert ((self.early_release and not owned_nodes) or
11813 (not self.early_release and not (set(owned_nodes) - nodes))), \
11814 ("Not owning the correct locks, early_release=%s, owned=%r,"
11815 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11819 def _CheckVolumeGroup(self, nodes):
11820 self.lu.LogInfo("Checking volume groups")
11822 vgname = self.cfg.GetVGName()
11824 # Make sure volume group exists on all involved nodes
11825 results = self.rpc.call_vg_list(nodes)
11827 raise errors.OpExecError("Can't list volume groups on the nodes")
11830 res = results[node]
11831 res.Raise("Error checking node %s" % node)
11832 if vgname not in res.payload:
11833 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11836 def _CheckDisksExistence(self, nodes):
11837 # Check disk existence
11838 for idx, dev in enumerate(self.instance.disks):
11839 if idx not in self.disks:
11843 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11844 self.cfg.SetDiskID(dev, node)
11846 result = _BlockdevFind(self, node, dev, self.instance)
11848 msg = result.fail_msg
11849 if msg or not result.payload:
11851 msg = "disk not found"
11852 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11855 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11856 for idx, dev in enumerate(self.instance.disks):
11857 if idx not in self.disks:
11860 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11863 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11864 on_primary, ldisk=ldisk):
11865 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11866 " replace disks for instance %s" %
11867 (node_name, self.instance.name))
11869 def _CreateNewStorage(self, node_name):
11870 """Create new storage on the primary or secondary node.
11872 This is only used for same-node replaces, not for changing the
11873 secondary node, hence we don't want to modify the existing disk.
11878 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11879 for idx, dev in enumerate(disks):
11880 if idx not in self.disks:
11883 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11885 self.cfg.SetDiskID(dev, node_name)
11887 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11888 names = _GenerateUniqueNames(self.lu, lv_names)
11890 (data_disk, meta_disk) = dev.children
11891 vg_data = data_disk.logical_id[0]
11892 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11893 logical_id=(vg_data, names[0]),
11894 params=data_disk.params)
11895 vg_meta = meta_disk.logical_id[0]
11896 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11897 size=constants.DRBD_META_SIZE,
11898 logical_id=(vg_meta, names[1]),
11899 params=meta_disk.params)
11901 new_lvs = [lv_data, lv_meta]
11902 old_lvs = [child.Copy() for child in dev.children]
11903 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11904 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11906 # we pass force_create=True to force the LVM creation
11907 for new_lv in new_lvs:
11908 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11909 _GetInstanceInfoText(self.instance), False,
11914 def _CheckDevices(self, node_name, iv_names):
11915 for name, (dev, _, _) in iv_names.iteritems():
11916 self.cfg.SetDiskID(dev, node_name)
11918 result = _BlockdevFind(self, node_name, dev, self.instance)
11920 msg = result.fail_msg
11921 if msg or not result.payload:
11923 msg = "disk not found"
11924 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11927 if result.payload.is_degraded:
11928 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11930 def _RemoveOldStorage(self, node_name, iv_names):
11931 for name, (_, old_lvs, _) in iv_names.iteritems():
11932 self.lu.LogInfo("Remove logical volumes for %s", name)
11935 self.cfg.SetDiskID(lv, node_name)
11937 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11939 self.lu.LogWarning("Can't remove old LV: %s", msg,
11940 hint="remove unused LVs manually")
11942 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11943 """Replace a disk on the primary or secondary for DRBD 8.
11945 The algorithm for replace is quite complicated:
11947 1. for each disk to be replaced:
11949 1. create new LVs on the target node with unique names
11950 1. detach old LVs from the drbd device
11951 1. rename old LVs to name_replaced.<time_t>
11952 1. rename new LVs to old LVs
11953 1. attach the new LVs (with the old names now) to the drbd device
11955 1. wait for sync across all devices
11957 1. for each modified disk:
11959 1. remove old LVs (which have the name name_replaces.<time_t>)
11961 Failures are not very well handled.
11966 # Step: check device activation
11967 self.lu.LogStep(1, steps_total, "Check device existence")
11968 self._CheckDisksExistence([self.other_node, self.target_node])
11969 self._CheckVolumeGroup([self.target_node, self.other_node])
11971 # Step: check other node consistency
11972 self.lu.LogStep(2, steps_total, "Check peer consistency")
11973 self._CheckDisksConsistency(self.other_node,
11974 self.other_node == self.instance.primary_node,
11977 # Step: create new storage
11978 self.lu.LogStep(3, steps_total, "Allocate new storage")
11979 iv_names = self._CreateNewStorage(self.target_node)
11981 # Step: for each lv, detach+rename*2+attach
11982 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11983 for dev, old_lvs, new_lvs in iv_names.itervalues():
11984 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11986 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11988 result.Raise("Can't detach drbd from local storage on node"
11989 " %s for device %s" % (self.target_node, dev.iv_name))
11991 #cfg.Update(instance)
11993 # ok, we created the new LVs, so now we know we have the needed
11994 # storage; as such, we proceed on the target node to rename
11995 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11996 # using the assumption that logical_id == physical_id (which in
11997 # turn is the unique_id on that node)
11999 # FIXME(iustin): use a better name for the replaced LVs
12000 temp_suffix = int(time.time())
12001 ren_fn = lambda d, suff: (d.physical_id[0],
12002 d.physical_id[1] + "_replaced-%s" % suff)
12004 # Build the rename list based on what LVs exist on the node
12005 rename_old_to_new = []
12006 for to_ren in old_lvs:
12007 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12008 if not result.fail_msg and result.payload:
12010 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12012 self.lu.LogInfo("Renaming the old LVs on the target node")
12013 result = self.rpc.call_blockdev_rename(self.target_node,
12015 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12017 # Now we rename the new LVs to the old LVs
12018 self.lu.LogInfo("Renaming the new LVs on the target node")
12019 rename_new_to_old = [(new, old.physical_id)
12020 for old, new in zip(old_lvs, new_lvs)]
12021 result = self.rpc.call_blockdev_rename(self.target_node,
12023 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12025 # Intermediate steps of in memory modifications
12026 for old, new in zip(old_lvs, new_lvs):
12027 new.logical_id = old.logical_id
12028 self.cfg.SetDiskID(new, self.target_node)
12030 # We need to modify old_lvs so that removal later removes the
12031 # right LVs, not the newly added ones; note that old_lvs is a
12033 for disk in old_lvs:
12034 disk.logical_id = ren_fn(disk, temp_suffix)
12035 self.cfg.SetDiskID(disk, self.target_node)
12037 # Now that the new lvs have the old name, we can add them to the device
12038 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12039 result = self.rpc.call_blockdev_addchildren(self.target_node,
12040 (dev, self.instance), new_lvs)
12041 msg = result.fail_msg
12043 for new_lv in new_lvs:
12044 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12047 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12048 hint=("cleanup manually the unused logical"
12050 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12052 cstep = itertools.count(5)
12054 if self.early_release:
12055 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12056 self._RemoveOldStorage(self.target_node, iv_names)
12057 # TODO: Check if releasing locks early still makes sense
12058 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12060 # Release all resource locks except those used by the instance
12061 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12062 keep=self.node_secondary_ip.keys())
12064 # Release all node locks while waiting for sync
12065 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12067 # TODO: Can the instance lock be downgraded here? Take the optional disk
12068 # shutdown in the caller into consideration.
12071 # This can fail as the old devices are degraded and _WaitForSync
12072 # does a combined result over all disks, so we don't check its return value
12073 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12074 _WaitForSync(self.lu, self.instance)
12076 # Check all devices manually
12077 self._CheckDevices(self.instance.primary_node, iv_names)
12079 # Step: remove old storage
12080 if not self.early_release:
12081 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12082 self._RemoveOldStorage(self.target_node, iv_names)
12084 def _ExecDrbd8Secondary(self, feedback_fn):
12085 """Replace the secondary node for DRBD 8.
12087 The algorithm for replace is quite complicated:
12088 - for all disks of the instance:
12089 - create new LVs on the new node with same names
12090 - shutdown the drbd device on the old secondary
12091 - disconnect the drbd network on the primary
12092 - create the drbd device on the new secondary
12093 - network attach the drbd on the primary, using an artifice:
12094 the drbd code for Attach() will connect to the network if it
12095 finds a device which is connected to the good local disks but
12096 not network enabled
12097 - wait for sync across all devices
12098 - remove all disks from the old secondary
12100 Failures are not very well handled.
12105 pnode = self.instance.primary_node
12107 # Step: check device activation
12108 self.lu.LogStep(1, steps_total, "Check device existence")
12109 self._CheckDisksExistence([self.instance.primary_node])
12110 self._CheckVolumeGroup([self.instance.primary_node])
12112 # Step: check other node consistency
12113 self.lu.LogStep(2, steps_total, "Check peer consistency")
12114 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12116 # Step: create new storage
12117 self.lu.LogStep(3, steps_total, "Allocate new storage")
12118 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12119 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12120 for idx, dev in enumerate(disks):
12121 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12122 (self.new_node, idx))
12123 # we pass force_create=True to force LVM creation
12124 for new_lv in dev.children:
12125 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12126 True, _GetInstanceInfoText(self.instance), False,
12129 # Step 4: dbrd minors and drbd setups changes
12130 # after this, we must manually remove the drbd minors on both the
12131 # error and the success paths
12132 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12133 minors = self.cfg.AllocateDRBDMinor([self.new_node
12134 for dev in self.instance.disks],
12135 self.instance.name)
12136 logging.debug("Allocated minors %r", minors)
12139 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12140 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12141 (self.new_node, idx))
12142 # create new devices on new_node; note that we create two IDs:
12143 # one without port, so the drbd will be activated without
12144 # networking information on the new node at this stage, and one
12145 # with network, for the latter activation in step 4
12146 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12147 if self.instance.primary_node == o_node1:
12150 assert self.instance.primary_node == o_node2, "Three-node instance?"
12153 new_alone_id = (self.instance.primary_node, self.new_node, None,
12154 p_minor, new_minor, o_secret)
12155 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12156 p_minor, new_minor, o_secret)
12158 iv_names[idx] = (dev, dev.children, new_net_id)
12159 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12161 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12162 logical_id=new_alone_id,
12163 children=dev.children,
12166 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12169 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12171 _GetInstanceInfoText(self.instance), False,
12173 except errors.GenericError:
12174 self.cfg.ReleaseDRBDMinors(self.instance.name)
12177 # We have new devices, shutdown the drbd on the old secondary
12178 for idx, dev in enumerate(self.instance.disks):
12179 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12180 self.cfg.SetDiskID(dev, self.target_node)
12181 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12182 (dev, self.instance)).fail_msg
12184 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12185 "node: %s" % (idx, msg),
12186 hint=("Please cleanup this device manually as"
12187 " soon as possible"))
12189 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12190 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12191 self.instance.disks)[pnode]
12193 msg = result.fail_msg
12195 # detaches didn't succeed (unlikely)
12196 self.cfg.ReleaseDRBDMinors(self.instance.name)
12197 raise errors.OpExecError("Can't detach the disks from the network on"
12198 " old node: %s" % (msg,))
12200 # if we managed to detach at least one, we update all the disks of
12201 # the instance to point to the new secondary
12202 self.lu.LogInfo("Updating instance configuration")
12203 for dev, _, new_logical_id in iv_names.itervalues():
12204 dev.logical_id = new_logical_id
12205 self.cfg.SetDiskID(dev, self.instance.primary_node)
12207 self.cfg.Update(self.instance, feedback_fn)
12209 # Release all node locks (the configuration has been updated)
12210 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12212 # and now perform the drbd attach
12213 self.lu.LogInfo("Attaching primary drbds to new secondary"
12214 " (standalone => connected)")
12215 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12217 self.node_secondary_ip,
12218 (self.instance.disks, self.instance),
12219 self.instance.name,
12221 for to_node, to_result in result.items():
12222 msg = to_result.fail_msg
12224 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12226 hint=("please do a gnt-instance info to see the"
12227 " status of disks"))
12229 cstep = itertools.count(5)
12231 if self.early_release:
12232 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12233 self._RemoveOldStorage(self.target_node, iv_names)
12234 # TODO: Check if releasing locks early still makes sense
12235 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12237 # Release all resource locks except those used by the instance
12238 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12239 keep=self.node_secondary_ip.keys())
12241 # TODO: Can the instance lock be downgraded here? Take the optional disk
12242 # shutdown in the caller into consideration.
12245 # This can fail as the old devices are degraded and _WaitForSync
12246 # does a combined result over all disks, so we don't check its return value
12247 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12248 _WaitForSync(self.lu, self.instance)
12250 # Check all devices manually
12251 self._CheckDevices(self.instance.primary_node, iv_names)
12253 # Step: remove old storage
12254 if not self.early_release:
12255 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12256 self._RemoveOldStorage(self.target_node, iv_names)
12259 class LURepairNodeStorage(NoHooksLU):
12260 """Repairs the volume group on a node.
12265 def CheckArguments(self):
12266 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12268 storage_type = self.op.storage_type
12270 if (constants.SO_FIX_CONSISTENCY not in
12271 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12272 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12273 " repaired" % storage_type,
12274 errors.ECODE_INVAL)
12276 def ExpandNames(self):
12277 self.needed_locks = {
12278 locking.LEVEL_NODE: [self.op.node_name],
12281 def _CheckFaultyDisks(self, instance, node_name):
12282 """Ensure faulty disks abort the opcode or at least warn."""
12284 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12286 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12287 " node '%s'" % (instance.name, node_name),
12288 errors.ECODE_STATE)
12289 except errors.OpPrereqError, err:
12290 if self.op.ignore_consistency:
12291 self.LogWarning(str(err.args[0]))
12295 def CheckPrereq(self):
12296 """Check prerequisites.
12299 # Check whether any instance on this node has faulty disks
12300 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12301 if inst.admin_state != constants.ADMINST_UP:
12303 check_nodes = set(inst.all_nodes)
12304 check_nodes.discard(self.op.node_name)
12305 for inst_node_name in check_nodes:
12306 self._CheckFaultyDisks(inst, inst_node_name)
12308 def Exec(self, feedback_fn):
12309 feedback_fn("Repairing storage unit '%s' on %s ..." %
12310 (self.op.name, self.op.node_name))
12312 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12313 result = self.rpc.call_storage_execute(self.op.node_name,
12314 self.op.storage_type, st_args,
12316 constants.SO_FIX_CONSISTENCY)
12317 result.Raise("Failed to repair storage unit '%s' on %s" %
12318 (self.op.name, self.op.node_name))
12321 class LUNodeEvacuate(NoHooksLU):
12322 """Evacuates instances off a list of nodes.
12327 _MODE2IALLOCATOR = {
12328 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12329 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12330 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12332 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12333 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12334 constants.IALLOCATOR_NEVAC_MODES)
12336 def CheckArguments(self):
12337 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12339 def ExpandNames(self):
12340 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12342 if self.op.remote_node is not None:
12343 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12344 assert self.op.remote_node
12346 if self.op.remote_node == self.op.node_name:
12347 raise errors.OpPrereqError("Can not use evacuated node as a new"
12348 " secondary node", errors.ECODE_INVAL)
12350 if self.op.mode != constants.NODE_EVAC_SEC:
12351 raise errors.OpPrereqError("Without the use of an iallocator only"
12352 " secondary instances can be evacuated",
12353 errors.ECODE_INVAL)
12356 self.share_locks = _ShareAll()
12357 self.needed_locks = {
12358 locking.LEVEL_INSTANCE: [],
12359 locking.LEVEL_NODEGROUP: [],
12360 locking.LEVEL_NODE: [],
12363 # Determine nodes (via group) optimistically, needs verification once locks
12364 # have been acquired
12365 self.lock_nodes = self._DetermineNodes()
12367 def _DetermineNodes(self):
12368 """Gets the list of nodes to operate on.
12371 if self.op.remote_node is None:
12372 # Iallocator will choose any node(s) in the same group
12373 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12375 group_nodes = frozenset([self.op.remote_node])
12377 # Determine nodes to be locked
12378 return set([self.op.node_name]) | group_nodes
12380 def _DetermineInstances(self):
12381 """Builds list of instances to operate on.
12384 assert self.op.mode in constants.NODE_EVAC_MODES
12386 if self.op.mode == constants.NODE_EVAC_PRI:
12387 # Primary instances only
12388 inst_fn = _GetNodePrimaryInstances
12389 assert self.op.remote_node is None, \
12390 "Evacuating primary instances requires iallocator"
12391 elif self.op.mode == constants.NODE_EVAC_SEC:
12392 # Secondary instances only
12393 inst_fn = _GetNodeSecondaryInstances
12396 assert self.op.mode == constants.NODE_EVAC_ALL
12397 inst_fn = _GetNodeInstances
12398 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12400 raise errors.OpPrereqError("Due to an issue with the iallocator"
12401 " interface it is not possible to evacuate"
12402 " all instances at once; specify explicitly"
12403 " whether to evacuate primary or secondary"
12405 errors.ECODE_INVAL)
12407 return inst_fn(self.cfg, self.op.node_name)
12409 def DeclareLocks(self, level):
12410 if level == locking.LEVEL_INSTANCE:
12411 # Lock instances optimistically, needs verification once node and group
12412 # locks have been acquired
12413 self.needed_locks[locking.LEVEL_INSTANCE] = \
12414 set(i.name for i in self._DetermineInstances())
12416 elif level == locking.LEVEL_NODEGROUP:
12417 # Lock node groups for all potential target nodes optimistically, needs
12418 # verification once nodes have been acquired
12419 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12420 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12422 elif level == locking.LEVEL_NODE:
12423 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12425 def CheckPrereq(self):
12427 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12428 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12429 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12431 need_nodes = self._DetermineNodes()
12433 if not owned_nodes.issuperset(need_nodes):
12434 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12435 " locks were acquired, current nodes are"
12436 " are '%s', used to be '%s'; retry the"
12438 (self.op.node_name,
12439 utils.CommaJoin(need_nodes),
12440 utils.CommaJoin(owned_nodes)),
12441 errors.ECODE_STATE)
12443 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12444 if owned_groups != wanted_groups:
12445 raise errors.OpExecError("Node groups changed since locks were acquired,"
12446 " current groups are '%s', used to be '%s';"
12447 " retry the operation" %
12448 (utils.CommaJoin(wanted_groups),
12449 utils.CommaJoin(owned_groups)))
12451 # Determine affected instances
12452 self.instances = self._DetermineInstances()
12453 self.instance_names = [i.name for i in self.instances]
12455 if set(self.instance_names) != owned_instances:
12456 raise errors.OpExecError("Instances on node '%s' changed since locks"
12457 " were acquired, current instances are '%s',"
12458 " used to be '%s'; retry the operation" %
12459 (self.op.node_name,
12460 utils.CommaJoin(self.instance_names),
12461 utils.CommaJoin(owned_instances)))
12463 if self.instance_names:
12464 self.LogInfo("Evacuating instances from node '%s': %s",
12466 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12468 self.LogInfo("No instances to evacuate from node '%s'",
12471 if self.op.remote_node is not None:
12472 for i in self.instances:
12473 if i.primary_node == self.op.remote_node:
12474 raise errors.OpPrereqError("Node %s is the primary node of"
12475 " instance %s, cannot use it as"
12477 (self.op.remote_node, i.name),
12478 errors.ECODE_INVAL)
12480 def Exec(self, feedback_fn):
12481 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12483 if not self.instance_names:
12484 # No instances to evacuate
12487 elif self.op.iallocator is not None:
12488 # TODO: Implement relocation to other group
12489 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12490 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12491 instances=list(self.instance_names))
12492 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12494 ial.Run(self.op.iallocator)
12496 if not ial.success:
12497 raise errors.OpPrereqError("Can't compute node evacuation using"
12498 " iallocator '%s': %s" %
12499 (self.op.iallocator, ial.info),
12500 errors.ECODE_NORES)
12502 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12504 elif self.op.remote_node is not None:
12505 assert self.op.mode == constants.NODE_EVAC_SEC
12507 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12508 remote_node=self.op.remote_node,
12510 mode=constants.REPLACE_DISK_CHG,
12511 early_release=self.op.early_release)]
12512 for instance_name in self.instance_names]
12515 raise errors.ProgrammerError("No iallocator or remote node")
12517 return ResultWithJobs(jobs)
12520 def _SetOpEarlyRelease(early_release, op):
12521 """Sets C{early_release} flag on opcodes if available.
12525 op.early_release = early_release
12526 except AttributeError:
12527 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12532 def _NodeEvacDest(use_nodes, group, nodes):
12533 """Returns group or nodes depending on caller's choice.
12537 return utils.CommaJoin(nodes)
12542 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12543 """Unpacks the result of change-group and node-evacuate iallocator requests.
12545 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12546 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12548 @type lu: L{LogicalUnit}
12549 @param lu: Logical unit instance
12550 @type alloc_result: tuple/list
12551 @param alloc_result: Result from iallocator
12552 @type early_release: bool
12553 @param early_release: Whether to release locks early if possible
12554 @type use_nodes: bool
12555 @param use_nodes: Whether to display node names instead of groups
12558 (moved, failed, jobs) = alloc_result
12561 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12562 for (name, reason) in failed)
12563 lu.LogWarning("Unable to evacuate instances %s", failreason)
12564 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12567 lu.LogInfo("Instances to be moved: %s",
12568 utils.CommaJoin("%s (to %s)" %
12569 (name, _NodeEvacDest(use_nodes, group, nodes))
12570 for (name, group, nodes) in moved))
12572 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12573 map(opcodes.OpCode.LoadOpCode, ops))
12577 def _DiskSizeInBytesToMebibytes(lu, size):
12578 """Converts a disk size in bytes to mebibytes.
12580 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12583 (mib, remainder) = divmod(size, 1024 * 1024)
12586 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12587 " to not overwrite existing data (%s bytes will not be"
12588 " wiped)", (1024 * 1024) - remainder)
12594 class LUInstanceGrowDisk(LogicalUnit):
12595 """Grow a disk of an instance.
12598 HPATH = "disk-grow"
12599 HTYPE = constants.HTYPE_INSTANCE
12602 def ExpandNames(self):
12603 self._ExpandAndLockInstance()
12604 self.needed_locks[locking.LEVEL_NODE] = []
12605 self.needed_locks[locking.LEVEL_NODE_RES] = []
12606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12607 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12609 def DeclareLocks(self, level):
12610 if level == locking.LEVEL_NODE:
12611 self._LockInstancesNodes()
12612 elif level == locking.LEVEL_NODE_RES:
12614 self.needed_locks[locking.LEVEL_NODE_RES] = \
12615 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12617 def BuildHooksEnv(self):
12618 """Build hooks env.
12620 This runs on the master, the primary and all the secondaries.
12624 "DISK": self.op.disk,
12625 "AMOUNT": self.op.amount,
12626 "ABSOLUTE": self.op.absolute,
12628 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12631 def BuildHooksNodes(self):
12632 """Build hooks nodes.
12635 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12638 def CheckPrereq(self):
12639 """Check prerequisites.
12641 This checks that the instance is in the cluster.
12644 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12645 assert instance is not None, \
12646 "Cannot retrieve locked instance %s" % self.op.instance_name
12647 nodenames = list(instance.all_nodes)
12648 for node in nodenames:
12649 _CheckNodeOnline(self, node)
12651 self.instance = instance
12653 if instance.disk_template not in constants.DTS_GROWABLE:
12654 raise errors.OpPrereqError("Instance's disk layout does not support"
12655 " growing", errors.ECODE_INVAL)
12657 self.disk = instance.FindDisk(self.op.disk)
12659 if self.op.absolute:
12660 self.target = self.op.amount
12661 self.delta = self.target - self.disk.size
12663 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12664 "current disk size (%s)" %
12665 (utils.FormatUnit(self.target, "h"),
12666 utils.FormatUnit(self.disk.size, "h")),
12667 errors.ECODE_STATE)
12669 self.delta = self.op.amount
12670 self.target = self.disk.size + self.delta
12672 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12673 utils.FormatUnit(self.delta, "h"),
12674 errors.ECODE_INVAL)
12676 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12678 def _CheckDiskSpace(self, nodenames, req_vgspace):
12679 template = self.instance.disk_template
12680 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12681 # TODO: check the free disk space for file, when that feature will be
12683 nodes = map(self.cfg.GetNodeInfo, nodenames)
12684 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12687 # With exclusive storage we need to something smarter than just looking
12688 # at free space; for now, let's simply abort the operation.
12689 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12690 " is enabled", errors.ECODE_STATE)
12691 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12693 def Exec(self, feedback_fn):
12694 """Execute disk grow.
12697 instance = self.instance
12700 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12701 assert (self.owned_locks(locking.LEVEL_NODE) ==
12702 self.owned_locks(locking.LEVEL_NODE_RES))
12704 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12706 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12708 raise errors.OpExecError("Cannot activate block device to grow")
12710 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12711 (self.op.disk, instance.name,
12712 utils.FormatUnit(self.delta, "h"),
12713 utils.FormatUnit(self.target, "h")))
12715 # First run all grow ops in dry-run mode
12716 for node in instance.all_nodes:
12717 self.cfg.SetDiskID(disk, node)
12718 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12720 result.Raise("Dry-run grow request failed to node %s" % node)
12723 # Get disk size from primary node for wiping
12724 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12725 result.Raise("Failed to retrieve disk size from node '%s'" %
12726 instance.primary_node)
12728 (disk_size_in_bytes, ) = result.payload
12730 if disk_size_in_bytes is None:
12731 raise errors.OpExecError("Failed to retrieve disk size from primary"
12732 " node '%s'" % instance.primary_node)
12734 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12736 assert old_disk_size >= disk.size, \
12737 ("Retrieved disk size too small (got %s, should be at least %s)" %
12738 (old_disk_size, disk.size))
12740 old_disk_size = None
12742 # We know that (as far as we can test) operations across different
12743 # nodes will succeed, time to run it for real on the backing storage
12744 for node in instance.all_nodes:
12745 self.cfg.SetDiskID(disk, node)
12746 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12748 result.Raise("Grow request failed to node %s" % node)
12750 # And now execute it for logical storage, on the primary node
12751 node = instance.primary_node
12752 self.cfg.SetDiskID(disk, node)
12753 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12755 result.Raise("Grow request failed to node %s" % node)
12757 disk.RecordGrow(self.delta)
12758 self.cfg.Update(instance, feedback_fn)
12760 # Changes have been recorded, release node lock
12761 _ReleaseLocks(self, locking.LEVEL_NODE)
12763 # Downgrade lock while waiting for sync
12764 self.glm.downgrade(locking.LEVEL_INSTANCE)
12766 assert wipe_disks ^ (old_disk_size is None)
12769 assert instance.disks[self.op.disk] == disk
12771 # Wipe newly added disk space
12772 _WipeDisks(self, instance,
12773 disks=[(self.op.disk, disk, old_disk_size)])
12775 if self.op.wait_for_sync:
12776 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12778 self.LogWarning("Disk syncing has not returned a good status; check"
12780 if instance.admin_state != constants.ADMINST_UP:
12781 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12782 elif instance.admin_state != constants.ADMINST_UP:
12783 self.LogWarning("Not shutting down the disk even if the instance is"
12784 " not supposed to be running because no wait for"
12785 " sync mode was requested")
12787 assert self.owned_locks(locking.LEVEL_NODE_RES)
12788 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12791 class LUInstanceQueryData(NoHooksLU):
12792 """Query runtime instance data.
12797 def ExpandNames(self):
12798 self.needed_locks = {}
12800 # Use locking if requested or when non-static information is wanted
12801 if not (self.op.static or self.op.use_locking):
12802 self.LogWarning("Non-static data requested, locks need to be acquired")
12803 self.op.use_locking = True
12805 if self.op.instances or not self.op.use_locking:
12806 # Expand instance names right here
12807 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12809 # Will use acquired locks
12810 self.wanted_names = None
12812 if self.op.use_locking:
12813 self.share_locks = _ShareAll()
12815 if self.wanted_names is None:
12816 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12818 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12820 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12821 self.needed_locks[locking.LEVEL_NODE] = []
12822 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12824 def DeclareLocks(self, level):
12825 if self.op.use_locking:
12826 if level == locking.LEVEL_NODEGROUP:
12827 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12829 # Lock all groups used by instances optimistically; this requires going
12830 # via the node before it's locked, requiring verification later on
12831 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12832 frozenset(group_uuid
12833 for instance_name in owned_instances
12835 self.cfg.GetInstanceNodeGroups(instance_name))
12837 elif level == locking.LEVEL_NODE:
12838 self._LockInstancesNodes()
12840 def CheckPrereq(self):
12841 """Check prerequisites.
12843 This only checks the optional instance list against the existing names.
12846 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12847 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12848 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12850 if self.wanted_names is None:
12851 assert self.op.use_locking, "Locking was not used"
12852 self.wanted_names = owned_instances
12854 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12856 if self.op.use_locking:
12857 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12860 assert not (owned_instances or owned_groups or owned_nodes)
12862 self.wanted_instances = instances.values()
12864 def _ComputeBlockdevStatus(self, node, instance, dev):
12865 """Returns the status of a block device
12868 if self.op.static or not node:
12871 self.cfg.SetDiskID(dev, node)
12873 result = self.rpc.call_blockdev_find(node, dev)
12877 result.Raise("Can't compute disk status for %s" % instance.name)
12879 status = result.payload
12883 return (status.dev_path, status.major, status.minor,
12884 status.sync_percent, status.estimated_time,
12885 status.is_degraded, status.ldisk_status)
12887 def _ComputeDiskStatus(self, instance, snode, dev):
12888 """Compute block device status.
12891 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12893 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12895 def _ComputeDiskStatusInner(self, instance, snode, dev):
12896 """Compute block device status.
12898 @attention: The device has to be annotated already.
12901 if dev.dev_type in constants.LDS_DRBD:
12902 # we change the snode then (otherwise we use the one passed in)
12903 if dev.logical_id[0] == instance.primary_node:
12904 snode = dev.logical_id[1]
12906 snode = dev.logical_id[0]
12908 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12910 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12913 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12920 "iv_name": dev.iv_name,
12921 "dev_type": dev.dev_type,
12922 "logical_id": dev.logical_id,
12923 "physical_id": dev.physical_id,
12924 "pstatus": dev_pstatus,
12925 "sstatus": dev_sstatus,
12926 "children": dev_children,
12931 def Exec(self, feedback_fn):
12932 """Gather and return data"""
12935 cluster = self.cfg.GetClusterInfo()
12937 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12938 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12940 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12941 for node in nodes.values()))
12943 group2name_fn = lambda uuid: groups[uuid].name
12945 for instance in self.wanted_instances:
12946 pnode = nodes[instance.primary_node]
12948 if self.op.static or pnode.offline:
12949 remote_state = None
12951 self.LogWarning("Primary node %s is marked offline, returning static"
12952 " information only for instance %s" %
12953 (pnode.name, instance.name))
12955 remote_info = self.rpc.call_instance_info(instance.primary_node,
12957 instance.hypervisor)
12958 remote_info.Raise("Error checking node %s" % instance.primary_node)
12959 remote_info = remote_info.payload
12960 if remote_info and "state" in remote_info:
12961 remote_state = "up"
12963 if instance.admin_state == constants.ADMINST_UP:
12964 remote_state = "down"
12966 remote_state = instance.admin_state
12968 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12971 snodes_group_uuids = [nodes[snode_name].group
12972 for snode_name in instance.secondary_nodes]
12974 result[instance.name] = {
12975 "name": instance.name,
12976 "config_state": instance.admin_state,
12977 "run_state": remote_state,
12978 "pnode": instance.primary_node,
12979 "pnode_group_uuid": pnode.group,
12980 "pnode_group_name": group2name_fn(pnode.group),
12981 "snodes": instance.secondary_nodes,
12982 "snodes_group_uuids": snodes_group_uuids,
12983 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12985 # this happens to be the same format used for hooks
12986 "nics": _NICListToTuple(self, instance.nics),
12987 "disk_template": instance.disk_template,
12989 "hypervisor": instance.hypervisor,
12990 "network_port": instance.network_port,
12991 "hv_instance": instance.hvparams,
12992 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12993 "be_instance": instance.beparams,
12994 "be_actual": cluster.FillBE(instance),
12995 "os_instance": instance.osparams,
12996 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12997 "serial_no": instance.serial_no,
12998 "mtime": instance.mtime,
12999 "ctime": instance.ctime,
13000 "uuid": instance.uuid,
13006 def PrepareContainerMods(mods, private_fn):
13007 """Prepares a list of container modifications by adding a private data field.
13009 @type mods: list of tuples; (operation, index, parameters)
13010 @param mods: List of modifications
13011 @type private_fn: callable or None
13012 @param private_fn: Callable for constructing a private data field for a
13017 if private_fn is None:
13022 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13025 #: Type description for changes as returned by L{ApplyContainerMods}'s
13027 _TApplyContModsCbChanges = \
13028 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13029 ht.TNonEmptyString,
13034 def ApplyContainerMods(kind, container, chgdesc, mods,
13035 create_fn, modify_fn, remove_fn):
13036 """Applies descriptions in C{mods} to C{container}.
13039 @param kind: One-word item description
13040 @type container: list
13041 @param container: Container to modify
13042 @type chgdesc: None or list
13043 @param chgdesc: List of applied changes
13045 @param mods: Modifications as returned by L{PrepareContainerMods}
13046 @type create_fn: callable
13047 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13048 receives absolute item index, parameters and private data object as added
13049 by L{PrepareContainerMods}, returns tuple containing new item and changes
13051 @type modify_fn: callable
13052 @param modify_fn: Callback for modifying an existing item
13053 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13054 and private data object as added by L{PrepareContainerMods}, returns
13056 @type remove_fn: callable
13057 @param remove_fn: Callback on removing item; receives absolute item index,
13058 item and private data object as added by L{PrepareContainerMods}
13061 for (op, idx, params, private) in mods:
13064 absidx = len(container) - 1
13066 raise IndexError("Not accepting negative indices other than -1")
13067 elif idx > len(container):
13068 raise IndexError("Got %s index %s, but there are only %s" %
13069 (kind, idx, len(container)))
13075 if op == constants.DDM_ADD:
13076 # Calculate where item will be added
13078 addidx = len(container)
13082 if create_fn is None:
13085 (item, changes) = create_fn(addidx, params, private)
13088 container.append(item)
13091 assert idx <= len(container)
13092 # list.insert does so before the specified index
13093 container.insert(idx, item)
13095 # Retrieve existing item
13097 item = container[absidx]
13099 raise IndexError("Invalid %s index %s" % (kind, idx))
13101 if op == constants.DDM_REMOVE:
13104 if remove_fn is not None:
13105 remove_fn(absidx, item, private)
13107 changes = [("%s/%s" % (kind, absidx), "remove")]
13109 assert container[absidx] == item
13110 del container[absidx]
13111 elif op == constants.DDM_MODIFY:
13112 if modify_fn is not None:
13113 changes = modify_fn(absidx, item, params, private)
13115 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13117 assert _TApplyContModsCbChanges(changes)
13119 if not (chgdesc is None or changes is None):
13120 chgdesc.extend(changes)
13123 def _UpdateIvNames(base_index, disks):
13124 """Updates the C{iv_name} attribute of disks.
13126 @type disks: list of L{objects.Disk}
13129 for (idx, disk) in enumerate(disks):
13130 disk.iv_name = "disk/%s" % (base_index + idx, )
13133 class _InstNicModPrivate:
13134 """Data structure for network interface modifications.
13136 Used by L{LUInstanceSetParams}.
13139 def __init__(self):
13144 class LUInstanceSetParams(LogicalUnit):
13145 """Modifies an instances's parameters.
13148 HPATH = "instance-modify"
13149 HTYPE = constants.HTYPE_INSTANCE
13153 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13154 assert ht.TList(mods)
13155 assert not mods or len(mods[0]) in (2, 3)
13157 if mods and len(mods[0]) == 2:
13161 for op, params in mods:
13162 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13163 result.append((op, -1, params))
13167 raise errors.OpPrereqError("Only one %s add or remove operation is"
13168 " supported at a time" % kind,
13169 errors.ECODE_INVAL)
13171 result.append((constants.DDM_MODIFY, op, params))
13173 assert verify_fn(result)
13180 def _CheckMods(kind, mods, key_types, item_fn):
13181 """Ensures requested disk/NIC modifications are valid.
13184 for (op, _, params) in mods:
13185 assert ht.TDict(params)
13187 # If 'key_types' is an empty dict, we assume we have an
13188 # 'ext' template and thus do not ForceDictType
13190 utils.ForceDictType(params, key_types)
13192 if op == constants.DDM_REMOVE:
13194 raise errors.OpPrereqError("No settings should be passed when"
13195 " removing a %s" % kind,
13196 errors.ECODE_INVAL)
13197 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13198 item_fn(op, params)
13200 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13203 def _VerifyDiskModification(op, params):
13204 """Verifies a disk modification.
13207 if op == constants.DDM_ADD:
13208 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13209 if mode not in constants.DISK_ACCESS_SET:
13210 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13211 errors.ECODE_INVAL)
13213 size = params.get(constants.IDISK_SIZE, None)
13215 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13216 constants.IDISK_SIZE, errors.ECODE_INVAL)
13220 except (TypeError, ValueError), err:
13221 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13222 errors.ECODE_INVAL)
13224 params[constants.IDISK_SIZE] = size
13226 elif op == constants.DDM_MODIFY:
13227 if constants.IDISK_SIZE in params:
13228 raise errors.OpPrereqError("Disk size change not possible, use"
13229 " grow-disk", errors.ECODE_INVAL)
13230 if constants.IDISK_MODE not in params:
13231 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13232 " modification supported, but missing",
13233 errors.ECODE_NOENT)
13234 if len(params) > 1:
13235 raise errors.OpPrereqError("Disk modification doesn't support"
13236 " additional arbitrary parameters",
13237 errors.ECODE_INVAL)
13240 def _VerifyNicModification(op, params):
13241 """Verifies a network interface modification.
13244 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13245 ip = params.get(constants.INIC_IP, None)
13246 req_net = params.get(constants.INIC_NETWORK, None)
13247 link = params.get(constants.NIC_LINK, None)
13248 mode = params.get(constants.NIC_MODE, None)
13249 if req_net is not None:
13250 if req_net.lower() == constants.VALUE_NONE:
13251 params[constants.INIC_NETWORK] = None
13253 elif link is not None or mode is not None:
13254 raise errors.OpPrereqError("If network is given"
13255 " mode or link should not",
13256 errors.ECODE_INVAL)
13258 if op == constants.DDM_ADD:
13259 macaddr = params.get(constants.INIC_MAC, None)
13260 if macaddr is None:
13261 params[constants.INIC_MAC] = constants.VALUE_AUTO
13264 if ip.lower() == constants.VALUE_NONE:
13265 params[constants.INIC_IP] = None
13267 if ip.lower() == constants.NIC_IP_POOL:
13268 if op == constants.DDM_ADD and req_net is None:
13269 raise errors.OpPrereqError("If ip=pool, parameter network"
13271 errors.ECODE_INVAL)
13273 if not netutils.IPAddress.IsValid(ip):
13274 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13275 errors.ECODE_INVAL)
13277 if constants.INIC_MAC in params:
13278 macaddr = params[constants.INIC_MAC]
13279 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13280 macaddr = utils.NormalizeAndValidateMac(macaddr)
13282 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13283 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13284 " modifying an existing NIC",
13285 errors.ECODE_INVAL)
13287 def CheckArguments(self):
13288 if not (self.op.nics or self.op.disks or self.op.disk_template or
13289 self.op.hvparams or self.op.beparams or self.op.os_name or
13290 self.op.offline is not None or self.op.runtime_mem):
13291 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13293 if self.op.hvparams:
13294 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13295 "hypervisor", "instance", "cluster")
13297 self.op.disks = self._UpgradeDiskNicMods(
13298 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13299 self.op.nics = self._UpgradeDiskNicMods(
13300 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13302 if self.op.disks and self.op.disk_template is not None:
13303 raise errors.OpPrereqError("Disk template conversion and other disk"
13304 " changes not supported at the same time",
13305 errors.ECODE_INVAL)
13307 if (self.op.disk_template and
13308 self.op.disk_template in constants.DTS_INT_MIRROR and
13309 self.op.remote_node is None):
13310 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13311 " one requires specifying a secondary node",
13312 errors.ECODE_INVAL)
13314 # Check NIC modifications
13315 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13316 self._VerifyNicModification)
13318 def ExpandNames(self):
13319 self._ExpandAndLockInstance()
13320 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13321 # Can't even acquire node locks in shared mode as upcoming changes in
13322 # Ganeti 2.6 will start to modify the node object on disk conversion
13323 self.needed_locks[locking.LEVEL_NODE] = []
13324 self.needed_locks[locking.LEVEL_NODE_RES] = []
13325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13326 # Look node group to look up the ipolicy
13327 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13329 def DeclareLocks(self, level):
13330 if level == locking.LEVEL_NODEGROUP:
13331 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13332 # Acquire locks for the instance's nodegroups optimistically. Needs
13333 # to be verified in CheckPrereq
13334 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13335 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13336 elif level == locking.LEVEL_NODE:
13337 self._LockInstancesNodes()
13338 if self.op.disk_template and self.op.remote_node:
13339 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13340 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13341 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13343 self.needed_locks[locking.LEVEL_NODE_RES] = \
13344 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13346 def BuildHooksEnv(self):
13347 """Build hooks env.
13349 This runs on the master, primary and secondaries.
13353 if constants.BE_MINMEM in self.be_new:
13354 args["minmem"] = self.be_new[constants.BE_MINMEM]
13355 if constants.BE_MAXMEM in self.be_new:
13356 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13357 if constants.BE_VCPUS in self.be_new:
13358 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13359 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13360 # information at all.
13362 if self._new_nics is not None:
13365 for nic in self._new_nics:
13366 n = copy.deepcopy(nic)
13367 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13368 n.nicparams = nicparams
13369 nics.append(_NICToTuple(self, n))
13371 args["nics"] = nics
13373 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13374 if self.op.disk_template:
13375 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13376 if self.op.runtime_mem:
13377 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13381 def BuildHooksNodes(self):
13382 """Build hooks nodes.
13385 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13388 def _PrepareNicModification(self, params, private, old_ip, old_net,
13389 old_params, cluster, pnode):
13391 update_params_dict = dict([(key, params[key])
13392 for key in constants.NICS_PARAMETERS
13395 req_link = update_params_dict.get(constants.NIC_LINK, None)
13396 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13398 new_net = params.get(constants.INIC_NETWORK, old_net)
13399 if new_net is not None:
13400 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13401 if netparams is None:
13402 raise errors.OpPrereqError("No netparams found for the network"
13403 " %s, probably not connected" % new_net,
13404 errors.ECODE_INVAL)
13405 new_params = dict(netparams)
13407 new_params = _GetUpdatedParams(old_params, update_params_dict)
13409 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13411 new_filled_params = cluster.SimpleFillNIC(new_params)
13412 objects.NIC.CheckParameterSyntax(new_filled_params)
13414 new_mode = new_filled_params[constants.NIC_MODE]
13415 if new_mode == constants.NIC_MODE_BRIDGED:
13416 bridge = new_filled_params[constants.NIC_LINK]
13417 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13419 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13421 self.warn.append(msg)
13423 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13425 elif new_mode == constants.NIC_MODE_ROUTED:
13426 ip = params.get(constants.INIC_IP, old_ip)
13428 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13429 " on a routed NIC", errors.ECODE_INVAL)
13431 elif new_mode == constants.NIC_MODE_OVS:
13432 # TODO: check OVS link
13433 self.LogInfo("OVS links are currently not checked for correctness")
13435 if constants.INIC_MAC in params:
13436 mac = params[constants.INIC_MAC]
13438 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13439 errors.ECODE_INVAL)
13440 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13441 # otherwise generate the MAC address
13442 params[constants.INIC_MAC] = \
13443 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13445 # or validate/reserve the current one
13447 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13448 except errors.ReservationError:
13449 raise errors.OpPrereqError("MAC address '%s' already in use"
13450 " in cluster" % mac,
13451 errors.ECODE_NOTUNIQUE)
13452 elif new_net != old_net:
13454 def get_net_prefix(net):
13456 uuid = self.cfg.LookupNetwork(net)
13458 nobj = self.cfg.GetNetwork(uuid)
13459 return nobj.mac_prefix
13462 new_prefix = get_net_prefix(new_net)
13463 old_prefix = get_net_prefix(old_net)
13464 if old_prefix != new_prefix:
13465 params[constants.INIC_MAC] = \
13466 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13468 #if there is a change in nic-network configuration
13469 new_ip = params.get(constants.INIC_IP, old_ip)
13470 if (new_ip, new_net) != (old_ip, old_net):
13473 if new_ip.lower() == constants.NIC_IP_POOL:
13475 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13476 except errors.ReservationError:
13477 raise errors.OpPrereqError("Unable to get a free IP"
13478 " from the address pool",
13479 errors.ECODE_STATE)
13480 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13481 params[constants.INIC_IP] = new_ip
13482 elif new_ip != old_ip or new_net != old_net:
13484 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13485 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13486 except errors.ReservationError:
13487 raise errors.OpPrereqError("IP %s not available in network %s" %
13489 errors.ECODE_NOTUNIQUE)
13490 elif new_ip.lower() == constants.NIC_IP_POOL:
13491 raise errors.OpPrereqError("ip=pool, but no network found",
13492 errors.ECODE_INVAL)
13495 elif self.op.conflicts_check:
13496 _CheckForConflictingIp(self, new_ip, pnode)
13501 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13502 except errors.AddressPoolError:
13503 logging.warning("Release IP %s not contained in network %s",
13506 # there are no changes in (net, ip) tuple
13507 elif (old_net is not None and
13508 (req_link is not None or req_mode is not None)):
13509 raise errors.OpPrereqError("Not allowed to change link or mode of"
13510 " a NIC that is connected to a network",
13511 errors.ECODE_INVAL)
13513 private.params = new_params
13514 private.filled = new_filled_params
13516 def _PreCheckDiskTemplate(self, pnode_info):
13517 """CheckPrereq checks related to a new disk template."""
13518 # Arguments are passed to avoid configuration lookups
13519 instance = self.instance
13520 pnode = instance.primary_node
13521 cluster = self.cluster
13522 if instance.disk_template == self.op.disk_template:
13523 raise errors.OpPrereqError("Instance already has disk template %s" %
13524 instance.disk_template, errors.ECODE_INVAL)
13526 if (instance.disk_template,
13527 self.op.disk_template) not in self._DISK_CONVERSIONS:
13528 raise errors.OpPrereqError("Unsupported disk template conversion from"
13529 " %s to %s" % (instance.disk_template,
13530 self.op.disk_template),
13531 errors.ECODE_INVAL)
13532 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13533 msg="cannot change disk template")
13534 if self.op.disk_template in constants.DTS_INT_MIRROR:
13535 if self.op.remote_node == pnode:
13536 raise errors.OpPrereqError("Given new secondary node %s is the same"
13537 " as the primary node of the instance" %
13538 self.op.remote_node, errors.ECODE_STATE)
13539 _CheckNodeOnline(self, self.op.remote_node)
13540 _CheckNodeNotDrained(self, self.op.remote_node)
13541 # FIXME: here we assume that the old instance type is DT_PLAIN
13542 assert instance.disk_template == constants.DT_PLAIN
13543 disks = [{constants.IDISK_SIZE: d.size,
13544 constants.IDISK_VG: d.logical_id[0]}
13545 for d in instance.disks]
13546 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13547 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13549 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13550 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13551 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13553 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13554 ignore=self.op.ignore_ipolicy)
13555 if pnode_info.group != snode_info.group:
13556 self.LogWarning("The primary and secondary nodes are in two"
13557 " different node groups; the disk parameters"
13558 " from the first disk's node group will be"
13561 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13562 # Make sure none of the nodes require exclusive storage
13563 nodes = [pnode_info]
13564 if self.op.disk_template in constants.DTS_INT_MIRROR:
13566 nodes.append(snode_info)
13567 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13568 if compat.any(map(has_es, nodes)):
13569 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13570 " storage is enabled" % (instance.disk_template,
13571 self.op.disk_template))
13572 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13574 def CheckPrereq(self):
13575 """Check prerequisites.
13577 This only checks the instance list against the existing names.
13580 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13581 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13583 cluster = self.cluster = self.cfg.GetClusterInfo()
13584 assert self.instance is not None, \
13585 "Cannot retrieve locked instance %s" % self.op.instance_name
13587 pnode = instance.primary_node
13588 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13589 nodelist = list(instance.all_nodes)
13590 pnode_info = self.cfg.GetNodeInfo(pnode)
13591 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13593 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13594 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13595 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13597 # dictionary with instance information after the modification
13600 # Check disk modifications. This is done here and not in CheckArguments
13601 # (as with NICs), because we need to know the instance's disk template
13602 if instance.disk_template == constants.DT_EXT:
13603 self._CheckMods("disk", self.op.disks, {},
13604 self._VerifyDiskModification)
13606 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13607 self._VerifyDiskModification)
13609 # Prepare disk/NIC modifications
13610 self.diskmod = PrepareContainerMods(self.op.disks, None)
13611 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13613 # Check the validity of the `provider' parameter
13614 if instance.disk_template in constants.DT_EXT:
13615 for mod in self.diskmod:
13616 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13617 if mod[0] == constants.DDM_ADD:
13618 if ext_provider is None:
13619 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13620 " '%s' missing, during disk add" %
13622 constants.IDISK_PROVIDER),
13623 errors.ECODE_NOENT)
13624 elif mod[0] == constants.DDM_MODIFY:
13626 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13628 constants.IDISK_PROVIDER,
13629 errors.ECODE_INVAL)
13631 for mod in self.diskmod:
13632 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13633 if ext_provider is not None:
13634 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13635 " instances of type '%s'" %
13636 (constants.IDISK_PROVIDER,
13638 errors.ECODE_INVAL)
13641 if self.op.os_name and not self.op.force:
13642 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13643 self.op.force_variant)
13644 instance_os = self.op.os_name
13646 instance_os = instance.os
13648 assert not (self.op.disk_template and self.op.disks), \
13649 "Can't modify disk template and apply disk changes at the same time"
13651 if self.op.disk_template:
13652 self._PreCheckDiskTemplate(pnode_info)
13654 # hvparams processing
13655 if self.op.hvparams:
13656 hv_type = instance.hypervisor
13657 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13658 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13659 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13662 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13663 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13664 self.hv_proposed = self.hv_new = hv_new # the new actual values
13665 self.hv_inst = i_hvdict # the new dict (without defaults)
13667 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13669 self.hv_new = self.hv_inst = {}
13671 # beparams processing
13672 if self.op.beparams:
13673 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13675 objects.UpgradeBeParams(i_bedict)
13676 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13677 be_new = cluster.SimpleFillBE(i_bedict)
13678 self.be_proposed = self.be_new = be_new # the new actual values
13679 self.be_inst = i_bedict # the new dict (without defaults)
13681 self.be_new = self.be_inst = {}
13682 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13683 be_old = cluster.FillBE(instance)
13685 # CPU param validation -- checking every time a parameter is
13686 # changed to cover all cases where either CPU mask or vcpus have
13688 if (constants.BE_VCPUS in self.be_proposed and
13689 constants.HV_CPU_MASK in self.hv_proposed):
13691 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13692 # Verify mask is consistent with number of vCPUs. Can skip this
13693 # test if only 1 entry in the CPU mask, which means same mask
13694 # is applied to all vCPUs.
13695 if (len(cpu_list) > 1 and
13696 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13697 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13699 (self.be_proposed[constants.BE_VCPUS],
13700 self.hv_proposed[constants.HV_CPU_MASK]),
13701 errors.ECODE_INVAL)
13703 # Only perform this test if a new CPU mask is given
13704 if constants.HV_CPU_MASK in self.hv_new:
13705 # Calculate the largest CPU number requested
13706 max_requested_cpu = max(map(max, cpu_list))
13707 # Check that all of the instance's nodes have enough physical CPUs to
13708 # satisfy the requested CPU mask
13709 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13710 max_requested_cpu + 1, instance.hypervisor)
13712 # osparams processing
13713 if self.op.osparams:
13714 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13715 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13716 self.os_inst = i_osdict # the new dict (without defaults)
13722 #TODO(dynmem): do the appropriate check involving MINMEM
13723 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13724 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13725 mem_check_list = [pnode]
13726 if be_new[constants.BE_AUTO_BALANCE]:
13727 # either we changed auto_balance to yes or it was from before
13728 mem_check_list.extend(instance.secondary_nodes)
13729 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13730 instance.hypervisor)
13731 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13732 [instance.hypervisor], False)
13733 pninfo = nodeinfo[pnode]
13734 msg = pninfo.fail_msg
13736 # Assume the primary node is unreachable and go ahead
13737 self.warn.append("Can't get info from primary node %s: %s" %
13740 (_, _, (pnhvinfo, )) = pninfo.payload
13741 if not isinstance(pnhvinfo.get("memory_free", None), int):
13742 self.warn.append("Node data from primary node %s doesn't contain"
13743 " free memory information" % pnode)
13744 elif instance_info.fail_msg:
13745 self.warn.append("Can't get instance runtime information: %s" %
13746 instance_info.fail_msg)
13748 if instance_info.payload:
13749 current_mem = int(instance_info.payload["memory"])
13751 # Assume instance not running
13752 # (there is a slight race condition here, but it's not very
13753 # probable, and we have no other way to check)
13754 # TODO: Describe race condition
13756 #TODO(dynmem): do the appropriate check involving MINMEM
13757 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13758 pnhvinfo["memory_free"])
13760 raise errors.OpPrereqError("This change will prevent the instance"
13761 " from starting, due to %d MB of memory"
13762 " missing on its primary node" %
13763 miss_mem, errors.ECODE_NORES)
13765 if be_new[constants.BE_AUTO_BALANCE]:
13766 for node, nres in nodeinfo.items():
13767 if node not in instance.secondary_nodes:
13769 nres.Raise("Can't get info from secondary node %s" % node,
13770 prereq=True, ecode=errors.ECODE_STATE)
13771 (_, _, (nhvinfo, )) = nres.payload
13772 if not isinstance(nhvinfo.get("memory_free", None), int):
13773 raise errors.OpPrereqError("Secondary node %s didn't return free"
13774 " memory information" % node,
13775 errors.ECODE_STATE)
13776 #TODO(dynmem): do the appropriate check involving MINMEM
13777 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13778 raise errors.OpPrereqError("This change will prevent the instance"
13779 " from failover to its secondary node"
13780 " %s, due to not enough memory" % node,
13781 errors.ECODE_STATE)
13783 if self.op.runtime_mem:
13784 remote_info = self.rpc.call_instance_info(instance.primary_node,
13786 instance.hypervisor)
13787 remote_info.Raise("Error checking node %s" % instance.primary_node)
13788 if not remote_info.payload: # not running already
13789 raise errors.OpPrereqError("Instance %s is not running" %
13790 instance.name, errors.ECODE_STATE)
13792 current_memory = remote_info.payload["memory"]
13793 if (not self.op.force and
13794 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13795 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13796 raise errors.OpPrereqError("Instance %s must have memory between %d"
13797 " and %d MB of memory unless --force is"
13800 self.be_proposed[constants.BE_MINMEM],
13801 self.be_proposed[constants.BE_MAXMEM]),
13802 errors.ECODE_INVAL)
13804 delta = self.op.runtime_mem - current_memory
13806 _CheckNodeFreeMemory(self, instance.primary_node,
13807 "ballooning memory for instance %s" %
13808 instance.name, delta, instance.hypervisor)
13810 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13811 raise errors.OpPrereqError("Disk operations not supported for"
13812 " diskless instances", errors.ECODE_INVAL)
13814 def _PrepareNicCreate(_, params, private):
13815 self._PrepareNicModification(params, private, None, None,
13816 {}, cluster, pnode)
13817 return (None, None)
13819 def _PrepareNicMod(_, nic, params, private):
13820 self._PrepareNicModification(params, private, nic.ip, nic.network,
13821 nic.nicparams, cluster, pnode)
13824 def _PrepareNicRemove(_, params, __):
13826 net = params.network
13827 if net is not None and ip is not None:
13828 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13830 # Verify NIC changes (operating on copy)
13831 nics = instance.nics[:]
13832 ApplyContainerMods("NIC", nics, None, self.nicmod,
13833 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13834 if len(nics) > constants.MAX_NICS:
13835 raise errors.OpPrereqError("Instance has too many network interfaces"
13836 " (%d), cannot add more" % constants.MAX_NICS,
13837 errors.ECODE_STATE)
13839 # Verify disk changes (operating on a copy)
13840 disks = instance.disks[:]
13841 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13842 if len(disks) > constants.MAX_DISKS:
13843 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13844 " more" % constants.MAX_DISKS,
13845 errors.ECODE_STATE)
13846 disk_sizes = [disk.size for disk in instance.disks]
13847 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13848 self.diskmod if op == constants.DDM_ADD)
13849 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13850 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13852 if self.op.offline is not None and self.op.offline:
13853 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13854 msg="can't change to offline")
13856 # Pre-compute NIC changes (necessary to use result in hooks)
13857 self._nic_chgdesc = []
13859 # Operate on copies as this is still in prereq
13860 nics = [nic.Copy() for nic in instance.nics]
13861 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13862 self._CreateNewNic, self._ApplyNicMods, None)
13863 self._new_nics = nics
13864 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13866 self._new_nics = None
13867 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13869 if not self.op.ignore_ipolicy:
13870 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13873 # Fill ispec with backend parameters
13874 ispec[constants.ISPEC_SPINDLE_USE] = \
13875 self.be_new.get(constants.BE_SPINDLE_USE, None)
13876 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13879 # Copy ispec to verify parameters with min/max values separately
13880 ispec_max = ispec.copy()
13881 ispec_max[constants.ISPEC_MEM_SIZE] = \
13882 self.be_new.get(constants.BE_MAXMEM, None)
13883 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13884 ispec_min = ispec.copy()
13885 ispec_min[constants.ISPEC_MEM_SIZE] = \
13886 self.be_new.get(constants.BE_MINMEM, None)
13887 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13889 if (res_max or res_min):
13890 # FIXME: Improve error message by including information about whether
13891 # the upper or lower limit of the parameter fails the ipolicy.
13892 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13893 (group_info, group_info.name,
13894 utils.CommaJoin(set(res_max + res_min))))
13895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13897 def _ConvertPlainToDrbd(self, feedback_fn):
13898 """Converts an instance from plain to drbd.
13901 feedback_fn("Converting template to drbd")
13902 instance = self.instance
13903 pnode = instance.primary_node
13904 snode = self.op.remote_node
13906 assert instance.disk_template == constants.DT_PLAIN
13908 # create a fake disk info for _GenerateDiskTemplate
13909 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13910 constants.IDISK_VG: d.logical_id[0]}
13911 for d in instance.disks]
13912 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13913 instance.name, pnode, [snode],
13914 disk_info, None, None, 0, feedback_fn,
13916 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13918 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13919 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13920 info = _GetInstanceInfoText(instance)
13921 feedback_fn("Creating additional volumes...")
13922 # first, create the missing data and meta devices
13923 for disk in anno_disks:
13924 # unfortunately this is... not too nice
13925 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13926 info, True, p_excl_stor)
13927 for child in disk.children:
13928 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13930 # at this stage, all new LVs have been created, we can rename the
13932 feedback_fn("Renaming original volumes...")
13933 rename_list = [(o, n.children[0].logical_id)
13934 for (o, n) in zip(instance.disks, new_disks)]
13935 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13936 result.Raise("Failed to rename original LVs")
13938 feedback_fn("Initializing DRBD devices...")
13939 # all child devices are in place, we can now create the DRBD devices
13940 for disk in anno_disks:
13941 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13942 f_create = node == pnode
13943 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13946 # at this point, the instance has been modified
13947 instance.disk_template = constants.DT_DRBD8
13948 instance.disks = new_disks
13949 self.cfg.Update(instance, feedback_fn)
13951 # Release node locks while waiting for sync
13952 _ReleaseLocks(self, locking.LEVEL_NODE)
13954 # disks are created, waiting for sync
13955 disk_abort = not _WaitForSync(self, instance,
13956 oneshot=not self.op.wait_for_sync)
13958 raise errors.OpExecError("There are some degraded disks for"
13959 " this instance, please cleanup manually")
13961 # Node resource locks will be released by caller
13963 def _ConvertDrbdToPlain(self, feedback_fn):
13964 """Converts an instance from drbd to plain.
13967 instance = self.instance
13969 assert len(instance.secondary_nodes) == 1
13970 assert instance.disk_template == constants.DT_DRBD8
13972 pnode = instance.primary_node
13973 snode = instance.secondary_nodes[0]
13974 feedback_fn("Converting template to plain")
13976 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13977 new_disks = [d.children[0] for d in instance.disks]
13979 # copy over size and mode
13980 for parent, child in zip(old_disks, new_disks):
13981 child.size = parent.size
13982 child.mode = parent.mode
13984 # this is a DRBD disk, return its port to the pool
13985 # NOTE: this must be done right before the call to cfg.Update!
13986 for disk in old_disks:
13987 tcp_port = disk.logical_id[2]
13988 self.cfg.AddTcpUdpPort(tcp_port)
13990 # update instance structure
13991 instance.disks = new_disks
13992 instance.disk_template = constants.DT_PLAIN
13993 self.cfg.Update(instance, feedback_fn)
13995 # Release locks in case removing disks takes a while
13996 _ReleaseLocks(self, locking.LEVEL_NODE)
13998 feedback_fn("Removing volumes on the secondary node...")
13999 for disk in old_disks:
14000 self.cfg.SetDiskID(disk, snode)
14001 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14003 self.LogWarning("Could not remove block device %s on node %s,"
14004 " continuing anyway: %s", disk.iv_name, snode, msg)
14006 feedback_fn("Removing unneeded volumes on the primary node...")
14007 for idx, disk in enumerate(old_disks):
14008 meta = disk.children[1]
14009 self.cfg.SetDiskID(meta, pnode)
14010 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14012 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14013 " continuing anyway: %s", idx, pnode, msg)
14015 def _CreateNewDisk(self, idx, params, _):
14016 """Creates a new disk.
14019 instance = self.instance
14022 if instance.disk_template in constants.DTS_FILEBASED:
14023 (file_driver, file_path) = instance.disks[0].logical_id
14024 file_path = os.path.dirname(file_path)
14026 file_driver = file_path = None
14029 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14030 instance.primary_node, instance.secondary_nodes,
14031 [params], file_path, file_driver, idx,
14032 self.Log, self.diskparams)[0]
14034 info = _GetInstanceInfoText(instance)
14036 logging.info("Creating volume %s for instance %s",
14037 disk.iv_name, instance.name)
14038 # Note: this needs to be kept in sync with _CreateDisks
14040 for node in instance.all_nodes:
14041 f_create = (node == instance.primary_node)
14043 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14044 except errors.OpExecError, err:
14045 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14046 disk.iv_name, disk, node, err)
14049 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14053 def _ModifyDisk(idx, disk, params, _):
14054 """Modifies a disk.
14057 disk.mode = params[constants.IDISK_MODE]
14060 ("disk.mode/%d" % idx, disk.mode),
14063 def _RemoveDisk(self, idx, root, _):
14067 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14068 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14069 self.cfg.SetDiskID(disk, node)
14070 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14072 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14073 " continuing anyway", idx, node, msg)
14075 # if this is a DRBD disk, return its port to the pool
14076 if root.dev_type in constants.LDS_DRBD:
14077 self.cfg.AddTcpUdpPort(root.logical_id[2])
14080 def _CreateNewNic(idx, params, private):
14081 """Creates data structure for a new network interface.
14084 mac = params[constants.INIC_MAC]
14085 ip = params.get(constants.INIC_IP, None)
14086 net = params.get(constants.INIC_NETWORK, None)
14087 #TODO: not private.filled?? can a nic have no nicparams??
14088 nicparams = private.filled
14090 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14092 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14093 (mac, ip, private.filled[constants.NIC_MODE],
14094 private.filled[constants.NIC_LINK],
14099 def _ApplyNicMods(idx, nic, params, private):
14100 """Modifies a network interface.
14105 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14107 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14108 setattr(nic, key, params[key])
14111 nic.nicparams = private.filled
14113 for (key, val) in nic.nicparams.items():
14114 changes.append(("nic.%s/%d" % (key, idx), val))
14118 def Exec(self, feedback_fn):
14119 """Modifies an instance.
14121 All parameters take effect only at the next restart of the instance.
14124 # Process here the warnings from CheckPrereq, as we don't have a
14125 # feedback_fn there.
14126 # TODO: Replace with self.LogWarning
14127 for warn in self.warn:
14128 feedback_fn("WARNING: %s" % warn)
14130 assert ((self.op.disk_template is None) ^
14131 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14132 "Not owning any node resource locks"
14135 instance = self.instance
14138 if self.op.runtime_mem:
14139 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14141 self.op.runtime_mem)
14142 rpcres.Raise("Cannot modify instance runtime memory")
14143 result.append(("runtime_memory", self.op.runtime_mem))
14145 # Apply disk changes
14146 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14147 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14148 _UpdateIvNames(0, instance.disks)
14150 if self.op.disk_template:
14152 check_nodes = set(instance.all_nodes)
14153 if self.op.remote_node:
14154 check_nodes.add(self.op.remote_node)
14155 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14156 owned = self.owned_locks(level)
14157 assert not (check_nodes - owned), \
14158 ("Not owning the correct locks, owning %r, expected at least %r" %
14159 (owned, check_nodes))
14161 r_shut = _ShutdownInstanceDisks(self, instance)
14163 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14164 " proceed with disk template conversion")
14165 mode = (instance.disk_template, self.op.disk_template)
14167 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14169 self.cfg.ReleaseDRBDMinors(instance.name)
14171 result.append(("disk_template", self.op.disk_template))
14173 assert instance.disk_template == self.op.disk_template, \
14174 ("Expected disk template '%s', found '%s'" %
14175 (self.op.disk_template, instance.disk_template))
14177 # Release node and resource locks if there are any (they might already have
14178 # been released during disk conversion)
14179 _ReleaseLocks(self, locking.LEVEL_NODE)
14180 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14182 # Apply NIC changes
14183 if self._new_nics is not None:
14184 instance.nics = self._new_nics
14185 result.extend(self._nic_chgdesc)
14188 if self.op.hvparams:
14189 instance.hvparams = self.hv_inst
14190 for key, val in self.op.hvparams.iteritems():
14191 result.append(("hv/%s" % key, val))
14194 if self.op.beparams:
14195 instance.beparams = self.be_inst
14196 for key, val in self.op.beparams.iteritems():
14197 result.append(("be/%s" % key, val))
14200 if self.op.os_name:
14201 instance.os = self.op.os_name
14204 if self.op.osparams:
14205 instance.osparams = self.os_inst
14206 for key, val in self.op.osparams.iteritems():
14207 result.append(("os/%s" % key, val))
14209 if self.op.offline is None:
14212 elif self.op.offline:
14213 # Mark instance as offline
14214 self.cfg.MarkInstanceOffline(instance.name)
14215 result.append(("admin_state", constants.ADMINST_OFFLINE))
14217 # Mark instance as online, but stopped
14218 self.cfg.MarkInstanceDown(instance.name)
14219 result.append(("admin_state", constants.ADMINST_DOWN))
14221 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14223 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14224 self.owned_locks(locking.LEVEL_NODE)), \
14225 "All node locks should have been released by now"
14229 _DISK_CONVERSIONS = {
14230 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14231 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14235 class LUInstanceChangeGroup(LogicalUnit):
14236 HPATH = "instance-change-group"
14237 HTYPE = constants.HTYPE_INSTANCE
14240 def ExpandNames(self):
14241 self.share_locks = _ShareAll()
14243 self.needed_locks = {
14244 locking.LEVEL_NODEGROUP: [],
14245 locking.LEVEL_NODE: [],
14246 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14249 self._ExpandAndLockInstance()
14251 if self.op.target_groups:
14252 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14253 self.op.target_groups)
14255 self.req_target_uuids = None
14257 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14259 def DeclareLocks(self, level):
14260 if level == locking.LEVEL_NODEGROUP:
14261 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14263 if self.req_target_uuids:
14264 lock_groups = set(self.req_target_uuids)
14266 # Lock all groups used by instance optimistically; this requires going
14267 # via the node before it's locked, requiring verification later on
14268 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14269 lock_groups.update(instance_groups)
14271 # No target groups, need to lock all of them
14272 lock_groups = locking.ALL_SET
14274 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14276 elif level == locking.LEVEL_NODE:
14277 if self.req_target_uuids:
14278 # Lock all nodes used by instances
14279 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14280 self._LockInstancesNodes()
14282 # Lock all nodes in all potential target groups
14283 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14284 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14285 member_nodes = [node_name
14286 for group in lock_groups
14287 for node_name in self.cfg.GetNodeGroup(group).members]
14288 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14290 # Lock all nodes as all groups are potential targets
14291 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14293 def CheckPrereq(self):
14294 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14295 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14296 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14298 assert (self.req_target_uuids is None or
14299 owned_groups.issuperset(self.req_target_uuids))
14300 assert owned_instances == set([self.op.instance_name])
14302 # Get instance information
14303 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14305 # Check if node groups for locked instance are still correct
14306 assert owned_nodes.issuperset(self.instance.all_nodes), \
14307 ("Instance %s's nodes changed while we kept the lock" %
14308 self.op.instance_name)
14310 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14313 if self.req_target_uuids:
14314 # User requested specific target groups
14315 self.target_uuids = frozenset(self.req_target_uuids)
14317 # All groups except those used by the instance are potential targets
14318 self.target_uuids = owned_groups - inst_groups
14320 conflicting_groups = self.target_uuids & inst_groups
14321 if conflicting_groups:
14322 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14323 " used by the instance '%s'" %
14324 (utils.CommaJoin(conflicting_groups),
14325 self.op.instance_name),
14326 errors.ECODE_INVAL)
14328 if not self.target_uuids:
14329 raise errors.OpPrereqError("There are no possible target groups",
14330 errors.ECODE_INVAL)
14332 def BuildHooksEnv(self):
14333 """Build hooks env.
14336 assert self.target_uuids
14339 "TARGET_GROUPS": " ".join(self.target_uuids),
14342 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14346 def BuildHooksNodes(self):
14347 """Build hooks nodes.
14350 mn = self.cfg.GetMasterNode()
14351 return ([mn], [mn])
14353 def Exec(self, feedback_fn):
14354 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14356 assert instances == [self.op.instance_name], "Instance not locked"
14358 req = iallocator.IAReqGroupChange(instances=instances,
14359 target_groups=list(self.target_uuids))
14360 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14362 ial.Run(self.op.iallocator)
14364 if not ial.success:
14365 raise errors.OpPrereqError("Can't compute solution for changing group of"
14366 " instance '%s' using iallocator '%s': %s" %
14367 (self.op.instance_name, self.op.iallocator,
14368 ial.info), errors.ECODE_NORES)
14370 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14372 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14373 " instance '%s'", len(jobs), self.op.instance_name)
14375 return ResultWithJobs(jobs)
14378 class LUBackupQuery(NoHooksLU):
14379 """Query the exports list
14384 def CheckArguments(self):
14385 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14386 ["node", "export"], self.op.use_locking)
14388 def ExpandNames(self):
14389 self.expq.ExpandNames(self)
14391 def DeclareLocks(self, level):
14392 self.expq.DeclareLocks(self, level)
14394 def Exec(self, feedback_fn):
14397 for (node, expname) in self.expq.OldStyleQuery(self):
14398 if expname is None:
14399 result[node] = False
14401 result.setdefault(node, []).append(expname)
14406 class _ExportQuery(_QueryBase):
14407 FIELDS = query.EXPORT_FIELDS
14409 #: The node name is not a unique key for this query
14410 SORT_FIELD = "node"
14412 def ExpandNames(self, lu):
14413 lu.needed_locks = {}
14415 # The following variables interact with _QueryBase._GetNames
14417 self.wanted = _GetWantedNodes(lu, self.names)
14419 self.wanted = locking.ALL_SET
14421 self.do_locking = self.use_locking
14423 if self.do_locking:
14424 lu.share_locks = _ShareAll()
14425 lu.needed_locks = {
14426 locking.LEVEL_NODE: self.wanted,
14430 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14432 def DeclareLocks(self, lu, level):
14435 def _GetQueryData(self, lu):
14436 """Computes the list of nodes and their attributes.
14439 # Locking is not used
14441 assert not (compat.any(lu.glm.is_owned(level)
14442 for level in locking.LEVELS
14443 if level != locking.LEVEL_CLUSTER) or
14444 self.do_locking or self.use_locking)
14446 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14450 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14452 result.append((node, None))
14454 result.extend((node, expname) for expname in nres.payload)
14459 class LUBackupPrepare(NoHooksLU):
14460 """Prepares an instance for an export and returns useful information.
14465 def ExpandNames(self):
14466 self._ExpandAndLockInstance()
14468 def CheckPrereq(self):
14469 """Check prerequisites.
14472 instance_name = self.op.instance_name
14474 self.instance = self.cfg.GetInstanceInfo(instance_name)
14475 assert self.instance is not None, \
14476 "Cannot retrieve locked instance %s" % self.op.instance_name
14477 _CheckNodeOnline(self, self.instance.primary_node)
14479 self._cds = _GetClusterDomainSecret()
14481 def Exec(self, feedback_fn):
14482 """Prepares an instance for an export.
14485 instance = self.instance
14487 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14488 salt = utils.GenerateSecret(8)
14490 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14491 result = self.rpc.call_x509_cert_create(instance.primary_node,
14492 constants.RIE_CERT_VALIDITY)
14493 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14495 (name, cert_pem) = result.payload
14497 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14501 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14502 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14504 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14510 class LUBackupExport(LogicalUnit):
14511 """Export an instance to an image in the cluster.
14514 HPATH = "instance-export"
14515 HTYPE = constants.HTYPE_INSTANCE
14518 def CheckArguments(self):
14519 """Check the arguments.
14522 self.x509_key_name = self.op.x509_key_name
14523 self.dest_x509_ca_pem = self.op.destination_x509_ca
14525 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14526 if not self.x509_key_name:
14527 raise errors.OpPrereqError("Missing X509 key name for encryption",
14528 errors.ECODE_INVAL)
14530 if not self.dest_x509_ca_pem:
14531 raise errors.OpPrereqError("Missing destination X509 CA",
14532 errors.ECODE_INVAL)
14534 def ExpandNames(self):
14535 self._ExpandAndLockInstance()
14537 # Lock all nodes for local exports
14538 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14539 # FIXME: lock only instance primary and destination node
14541 # Sad but true, for now we have do lock all nodes, as we don't know where
14542 # the previous export might be, and in this LU we search for it and
14543 # remove it from its current node. In the future we could fix this by:
14544 # - making a tasklet to search (share-lock all), then create the
14545 # new one, then one to remove, after
14546 # - removing the removal operation altogether
14547 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14549 # Allocations should be stopped while this LU runs with node locks, but
14550 # it doesn't have to be exclusive
14551 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14552 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14554 def DeclareLocks(self, level):
14555 """Last minute lock declaration."""
14556 # All nodes are locked anyway, so nothing to do here.
14558 def BuildHooksEnv(self):
14559 """Build hooks env.
14561 This will run on the master, primary node and target node.
14565 "EXPORT_MODE": self.op.mode,
14566 "EXPORT_NODE": self.op.target_node,
14567 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14568 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14569 # TODO: Generic function for boolean env variables
14570 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14573 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14577 def BuildHooksNodes(self):
14578 """Build hooks nodes.
14581 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14583 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14584 nl.append(self.op.target_node)
14588 def CheckPrereq(self):
14589 """Check prerequisites.
14591 This checks that the instance and node names are valid.
14594 instance_name = self.op.instance_name
14596 self.instance = self.cfg.GetInstanceInfo(instance_name)
14597 assert self.instance is not None, \
14598 "Cannot retrieve locked instance %s" % self.op.instance_name
14599 _CheckNodeOnline(self, self.instance.primary_node)
14601 if (self.op.remove_instance and
14602 self.instance.admin_state == constants.ADMINST_UP and
14603 not self.op.shutdown):
14604 raise errors.OpPrereqError("Can not remove instance without shutting it"
14605 " down before", errors.ECODE_STATE)
14607 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14608 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14609 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14610 assert self.dst_node is not None
14612 _CheckNodeOnline(self, self.dst_node.name)
14613 _CheckNodeNotDrained(self, self.dst_node.name)
14616 self.dest_disk_info = None
14617 self.dest_x509_ca = None
14619 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14620 self.dst_node = None
14622 if len(self.op.target_node) != len(self.instance.disks):
14623 raise errors.OpPrereqError(("Received destination information for %s"
14624 " disks, but instance %s has %s disks") %
14625 (len(self.op.target_node), instance_name,
14626 len(self.instance.disks)),
14627 errors.ECODE_INVAL)
14629 cds = _GetClusterDomainSecret()
14631 # Check X509 key name
14633 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14634 except (TypeError, ValueError), err:
14635 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14636 errors.ECODE_INVAL)
14638 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14639 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14640 errors.ECODE_INVAL)
14642 # Load and verify CA
14644 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14645 except OpenSSL.crypto.Error, err:
14646 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14647 (err, ), errors.ECODE_INVAL)
14649 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14650 if errcode is not None:
14651 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14652 (msg, ), errors.ECODE_INVAL)
14654 self.dest_x509_ca = cert
14656 # Verify target information
14658 for idx, disk_data in enumerate(self.op.target_node):
14660 (host, port, magic) = \
14661 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14662 except errors.GenericError, err:
14663 raise errors.OpPrereqError("Target info for disk %s: %s" %
14664 (idx, err), errors.ECODE_INVAL)
14666 disk_info.append((host, port, magic))
14668 assert len(disk_info) == len(self.op.target_node)
14669 self.dest_disk_info = disk_info
14672 raise errors.ProgrammerError("Unhandled export mode %r" %
14675 # instance disk type verification
14676 # TODO: Implement export support for file-based disks
14677 for disk in self.instance.disks:
14678 if disk.dev_type == constants.LD_FILE:
14679 raise errors.OpPrereqError("Export not supported for instances with"
14680 " file-based disks", errors.ECODE_INVAL)
14682 def _CleanupExports(self, feedback_fn):
14683 """Removes exports of current instance from all other nodes.
14685 If an instance in a cluster with nodes A..D was exported to node C, its
14686 exports will be removed from the nodes A, B and D.
14689 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14691 nodelist = self.cfg.GetNodeList()
14692 nodelist.remove(self.dst_node.name)
14694 # on one-node clusters nodelist will be empty after the removal
14695 # if we proceed the backup would be removed because OpBackupQuery
14696 # substitutes an empty list with the full cluster node list.
14697 iname = self.instance.name
14699 feedback_fn("Removing old exports for instance %s" % iname)
14700 exportlist = self.rpc.call_export_list(nodelist)
14701 for node in exportlist:
14702 if exportlist[node].fail_msg:
14704 if iname in exportlist[node].payload:
14705 msg = self.rpc.call_export_remove(node, iname).fail_msg
14707 self.LogWarning("Could not remove older export for instance %s"
14708 " on node %s: %s", iname, node, msg)
14710 def Exec(self, feedback_fn):
14711 """Export an instance to an image in the cluster.
14714 assert self.op.mode in constants.EXPORT_MODES
14716 instance = self.instance
14717 src_node = instance.primary_node
14719 if self.op.shutdown:
14720 # shutdown the instance, but not the disks
14721 feedback_fn("Shutting down instance %s" % instance.name)
14722 result = self.rpc.call_instance_shutdown(src_node, instance,
14723 self.op.shutdown_timeout)
14724 # TODO: Maybe ignore failures if ignore_remove_failures is set
14725 result.Raise("Could not shutdown instance %s on"
14726 " node %s" % (instance.name, src_node))
14728 # set the disks ID correctly since call_instance_start needs the
14729 # correct drbd minor to create the symlinks
14730 for disk in instance.disks:
14731 self.cfg.SetDiskID(disk, src_node)
14733 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14736 # Activate the instance disks if we'exporting a stopped instance
14737 feedback_fn("Activating disks for %s" % instance.name)
14738 _StartInstanceDisks(self, instance, None)
14741 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14744 helper.CreateSnapshots()
14746 if (self.op.shutdown and
14747 instance.admin_state == constants.ADMINST_UP and
14748 not self.op.remove_instance):
14749 assert not activate_disks
14750 feedback_fn("Starting instance %s" % instance.name)
14751 result = self.rpc.call_instance_start(src_node,
14752 (instance, None, None), False)
14753 msg = result.fail_msg
14755 feedback_fn("Failed to start instance: %s" % msg)
14756 _ShutdownInstanceDisks(self, instance)
14757 raise errors.OpExecError("Could not start instance: %s" % msg)
14759 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14760 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14761 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14762 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14763 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14765 (key_name, _, _) = self.x509_key_name
14768 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14771 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14772 key_name, dest_ca_pem,
14777 # Check for backwards compatibility
14778 assert len(dresults) == len(instance.disks)
14779 assert compat.all(isinstance(i, bool) for i in dresults), \
14780 "Not all results are boolean: %r" % dresults
14784 feedback_fn("Deactivating disks for %s" % instance.name)
14785 _ShutdownInstanceDisks(self, instance)
14787 if not (compat.all(dresults) and fin_resu):
14790 failures.append("export finalization")
14791 if not compat.all(dresults):
14792 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14794 failures.append("disk export: disk(s) %s" % fdsk)
14796 raise errors.OpExecError("Export failed, errors in %s" %
14797 utils.CommaJoin(failures))
14799 # At this point, the export was successful, we can cleanup/finish
14801 # Remove instance if requested
14802 if self.op.remove_instance:
14803 feedback_fn("Removing instance %s" % instance.name)
14804 _RemoveInstance(self, feedback_fn, instance,
14805 self.op.ignore_remove_failures)
14807 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14808 self._CleanupExports(feedback_fn)
14810 return fin_resu, dresults
14813 class LUBackupRemove(NoHooksLU):
14814 """Remove exports related to the named instance.
14819 def ExpandNames(self):
14820 self.needed_locks = {
14821 # We need all nodes to be locked in order for RemoveExport to work, but
14822 # we don't need to lock the instance itself, as nothing will happen to it
14823 # (and we can remove exports also for a removed instance)
14824 locking.LEVEL_NODE: locking.ALL_SET,
14826 # Removing backups is quick, so blocking allocations is justified
14827 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14830 # Allocations should be stopped while this LU runs with node locks, but it
14831 # doesn't have to be exclusive
14832 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14834 def Exec(self, feedback_fn):
14835 """Remove any export.
14838 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14839 # If the instance was not found we'll try with the name that was passed in.
14840 # This will only work if it was an FQDN, though.
14842 if not instance_name:
14844 instance_name = self.op.instance_name
14846 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14847 exportlist = self.rpc.call_export_list(locked_nodes)
14849 for node in exportlist:
14850 msg = exportlist[node].fail_msg
14852 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14854 if instance_name in exportlist[node].payload:
14856 result = self.rpc.call_export_remove(node, instance_name)
14857 msg = result.fail_msg
14859 logging.error("Could not remove export for instance %s"
14860 " on node %s: %s", instance_name, node, msg)
14862 if fqdn_warn and not found:
14863 feedback_fn("Export not found. If trying to remove an export belonging"
14864 " to a deleted instance please use its Fully Qualified"
14868 class LUGroupAdd(LogicalUnit):
14869 """Logical unit for creating node groups.
14872 HPATH = "group-add"
14873 HTYPE = constants.HTYPE_GROUP
14876 def ExpandNames(self):
14877 # We need the new group's UUID here so that we can create and acquire the
14878 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14879 # that it should not check whether the UUID exists in the configuration.
14880 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14881 self.needed_locks = {}
14882 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14884 def CheckPrereq(self):
14885 """Check prerequisites.
14887 This checks that the given group name is not an existing node group
14892 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14893 except errors.OpPrereqError:
14896 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14897 " node group (UUID: %s)" %
14898 (self.op.group_name, existing_uuid),
14899 errors.ECODE_EXISTS)
14901 if self.op.ndparams:
14902 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14904 if self.op.hv_state:
14905 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14907 self.new_hv_state = None
14909 if self.op.disk_state:
14910 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14912 self.new_disk_state = None
14914 if self.op.diskparams:
14915 for templ in constants.DISK_TEMPLATES:
14916 if templ in self.op.diskparams:
14917 utils.ForceDictType(self.op.diskparams[templ],
14918 constants.DISK_DT_TYPES)
14919 self.new_diskparams = self.op.diskparams
14921 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14922 except errors.OpPrereqError, err:
14923 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14924 errors.ECODE_INVAL)
14926 self.new_diskparams = {}
14928 if self.op.ipolicy:
14929 cluster = self.cfg.GetClusterInfo()
14930 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14932 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14933 except errors.ConfigurationError, err:
14934 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14935 errors.ECODE_INVAL)
14937 def BuildHooksEnv(self):
14938 """Build hooks env.
14942 "GROUP_NAME": self.op.group_name,
14945 def BuildHooksNodes(self):
14946 """Build hooks nodes.
14949 mn = self.cfg.GetMasterNode()
14950 return ([mn], [mn])
14952 def Exec(self, feedback_fn):
14953 """Add the node group to the cluster.
14956 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14957 uuid=self.group_uuid,
14958 alloc_policy=self.op.alloc_policy,
14959 ndparams=self.op.ndparams,
14960 diskparams=self.new_diskparams,
14961 ipolicy=self.op.ipolicy,
14962 hv_state_static=self.new_hv_state,
14963 disk_state_static=self.new_disk_state)
14965 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14966 del self.remove_locks[locking.LEVEL_NODEGROUP]
14969 class LUGroupAssignNodes(NoHooksLU):
14970 """Logical unit for assigning nodes to groups.
14975 def ExpandNames(self):
14976 # These raise errors.OpPrereqError on their own:
14977 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14978 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14980 # We want to lock all the affected nodes and groups. We have readily
14981 # available the list of nodes, and the *destination* group. To gather the
14982 # list of "source" groups, we need to fetch node information later on.
14983 self.needed_locks = {
14984 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14985 locking.LEVEL_NODE: self.op.nodes,
14988 def DeclareLocks(self, level):
14989 if level == locking.LEVEL_NODEGROUP:
14990 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14992 # Try to get all affected nodes' groups without having the group or node
14993 # lock yet. Needs verification later in the code flow.
14994 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14996 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14998 def CheckPrereq(self):
14999 """Check prerequisites.
15002 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15003 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15004 frozenset(self.op.nodes))
15006 expected_locks = (set([self.group_uuid]) |
15007 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15008 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15009 if actual_locks != expected_locks:
15010 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15011 " current groups are '%s', used to be '%s'" %
15012 (utils.CommaJoin(expected_locks),
15013 utils.CommaJoin(actual_locks)))
15015 self.node_data = self.cfg.GetAllNodesInfo()
15016 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15017 instance_data = self.cfg.GetAllInstancesInfo()
15019 if self.group is None:
15020 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15021 (self.op.group_name, self.group_uuid))
15023 (new_splits, previous_splits) = \
15024 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15025 for node in self.op.nodes],
15026 self.node_data, instance_data)
15029 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15031 if not self.op.force:
15032 raise errors.OpExecError("The following instances get split by this"
15033 " change and --force was not given: %s" %
15036 self.LogWarning("This operation will split the following instances: %s",
15039 if previous_splits:
15040 self.LogWarning("In addition, these already-split instances continue"
15041 " to be split across groups: %s",
15042 utils.CommaJoin(utils.NiceSort(previous_splits)))
15044 def Exec(self, feedback_fn):
15045 """Assign nodes to a new group.
15048 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15050 self.cfg.AssignGroupNodes(mods)
15053 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15054 """Check for split instances after a node assignment.
15056 This method considers a series of node assignments as an atomic operation,
15057 and returns information about split instances after applying the set of
15060 In particular, it returns information about newly split instances, and
15061 instances that were already split, and remain so after the change.
15063 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15066 @type changes: list of (node_name, new_group_uuid) pairs.
15067 @param changes: list of node assignments to consider.
15068 @param node_data: a dict with data for all nodes
15069 @param instance_data: a dict with all instances to consider
15070 @rtype: a two-tuple
15071 @return: a list of instances that were previously okay and result split as a
15072 consequence of this change, and a list of instances that were previously
15073 split and this change does not fix.
15076 changed_nodes = dict((node, group) for node, group in changes
15077 if node_data[node].group != group)
15079 all_split_instances = set()
15080 previously_split_instances = set()
15082 def InstanceNodes(instance):
15083 return [instance.primary_node] + list(instance.secondary_nodes)
15085 for inst in instance_data.values():
15086 if inst.disk_template not in constants.DTS_INT_MIRROR:
15089 instance_nodes = InstanceNodes(inst)
15091 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15092 previously_split_instances.add(inst.name)
15094 if len(set(changed_nodes.get(node, node_data[node].group)
15095 for node in instance_nodes)) > 1:
15096 all_split_instances.add(inst.name)
15098 return (list(all_split_instances - previously_split_instances),
15099 list(previously_split_instances & all_split_instances))
15102 class _GroupQuery(_QueryBase):
15103 FIELDS = query.GROUP_FIELDS
15105 def ExpandNames(self, lu):
15106 lu.needed_locks = {}
15108 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15109 self._cluster = lu.cfg.GetClusterInfo()
15110 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15113 self.wanted = [name_to_uuid[name]
15114 for name in utils.NiceSort(name_to_uuid.keys())]
15116 # Accept names to be either names or UUIDs.
15119 all_uuid = frozenset(self._all_groups.keys())
15121 for name in self.names:
15122 if name in all_uuid:
15123 self.wanted.append(name)
15124 elif name in name_to_uuid:
15125 self.wanted.append(name_to_uuid[name])
15127 missing.append(name)
15130 raise errors.OpPrereqError("Some groups do not exist: %s" %
15131 utils.CommaJoin(missing),
15132 errors.ECODE_NOENT)
15134 def DeclareLocks(self, lu, level):
15137 def _GetQueryData(self, lu):
15138 """Computes the list of node groups and their attributes.
15141 do_nodes = query.GQ_NODE in self.requested_data
15142 do_instances = query.GQ_INST in self.requested_data
15144 group_to_nodes = None
15145 group_to_instances = None
15147 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15148 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15149 # latter GetAllInstancesInfo() is not enough, for we have to go through
15150 # instance->node. Hence, we will need to process nodes even if we only need
15151 # instance information.
15152 if do_nodes or do_instances:
15153 all_nodes = lu.cfg.GetAllNodesInfo()
15154 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15157 for node in all_nodes.values():
15158 if node.group in group_to_nodes:
15159 group_to_nodes[node.group].append(node.name)
15160 node_to_group[node.name] = node.group
15163 all_instances = lu.cfg.GetAllInstancesInfo()
15164 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15166 for instance in all_instances.values():
15167 node = instance.primary_node
15168 if node in node_to_group:
15169 group_to_instances[node_to_group[node]].append(instance.name)
15172 # Do not pass on node information if it was not requested.
15173 group_to_nodes = None
15175 return query.GroupQueryData(self._cluster,
15176 [self._all_groups[uuid]
15177 for uuid in self.wanted],
15178 group_to_nodes, group_to_instances,
15179 query.GQ_DISKPARAMS in self.requested_data)
15182 class LUGroupQuery(NoHooksLU):
15183 """Logical unit for querying node groups.
15188 def CheckArguments(self):
15189 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15190 self.op.output_fields, False)
15192 def ExpandNames(self):
15193 self.gq.ExpandNames(self)
15195 def DeclareLocks(self, level):
15196 self.gq.DeclareLocks(self, level)
15198 def Exec(self, feedback_fn):
15199 return self.gq.OldStyleQuery(self)
15202 class LUGroupSetParams(LogicalUnit):
15203 """Modifies the parameters of a node group.
15206 HPATH = "group-modify"
15207 HTYPE = constants.HTYPE_GROUP
15210 def CheckArguments(self):
15213 self.op.diskparams,
15214 self.op.alloc_policy,
15216 self.op.disk_state,
15220 if all_changes.count(None) == len(all_changes):
15221 raise errors.OpPrereqError("Please pass at least one modification",
15222 errors.ECODE_INVAL)
15224 def ExpandNames(self):
15225 # This raises errors.OpPrereqError on its own:
15226 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15228 self.needed_locks = {
15229 locking.LEVEL_INSTANCE: [],
15230 locking.LEVEL_NODEGROUP: [self.group_uuid],
15233 self.share_locks[locking.LEVEL_INSTANCE] = 1
15235 def DeclareLocks(self, level):
15236 if level == locking.LEVEL_INSTANCE:
15237 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15239 # Lock instances optimistically, needs verification once group lock has
15241 self.needed_locks[locking.LEVEL_INSTANCE] = \
15242 self.cfg.GetNodeGroupInstances(self.group_uuid)
15245 def _UpdateAndVerifyDiskParams(old, new):
15246 """Updates and verifies disk parameters.
15249 new_params = _GetUpdatedParams(old, new)
15250 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15253 def CheckPrereq(self):
15254 """Check prerequisites.
15257 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15259 # Check if locked instances are still correct
15260 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15262 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15263 cluster = self.cfg.GetClusterInfo()
15265 if self.group is None:
15266 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15267 (self.op.group_name, self.group_uuid))
15269 if self.op.ndparams:
15270 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15271 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15272 self.new_ndparams = new_ndparams
15274 if self.op.diskparams:
15275 diskparams = self.group.diskparams
15276 uavdp = self._UpdateAndVerifyDiskParams
15277 # For each disktemplate subdict update and verify the values
15278 new_diskparams = dict((dt,
15279 uavdp(diskparams.get(dt, {}),
15280 self.op.diskparams[dt]))
15281 for dt in constants.DISK_TEMPLATES
15282 if dt in self.op.diskparams)
15283 # As we've all subdicts of diskparams ready, lets merge the actual
15284 # dict with all updated subdicts
15285 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15287 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15288 except errors.OpPrereqError, err:
15289 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15290 errors.ECODE_INVAL)
15292 if self.op.hv_state:
15293 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15294 self.group.hv_state_static)
15296 if self.op.disk_state:
15297 self.new_disk_state = \
15298 _MergeAndVerifyDiskState(self.op.disk_state,
15299 self.group.disk_state_static)
15301 if self.op.ipolicy:
15302 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15306 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15307 inst_filter = lambda inst: inst.name in owned_instances
15308 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15309 gmi = ganeti.masterd.instance
15311 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15313 new_ipolicy, instances)
15316 self.LogWarning("After the ipolicy change the following instances"
15317 " violate them: %s",
15318 utils.CommaJoin(violations))
15320 def BuildHooksEnv(self):
15321 """Build hooks env.
15325 "GROUP_NAME": self.op.group_name,
15326 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15329 def BuildHooksNodes(self):
15330 """Build hooks nodes.
15333 mn = self.cfg.GetMasterNode()
15334 return ([mn], [mn])
15336 def Exec(self, feedback_fn):
15337 """Modifies the node group.
15342 if self.op.ndparams:
15343 self.group.ndparams = self.new_ndparams
15344 result.append(("ndparams", str(self.group.ndparams)))
15346 if self.op.diskparams:
15347 self.group.diskparams = self.new_diskparams
15348 result.append(("diskparams", str(self.group.diskparams)))
15350 if self.op.alloc_policy:
15351 self.group.alloc_policy = self.op.alloc_policy
15353 if self.op.hv_state:
15354 self.group.hv_state_static = self.new_hv_state
15356 if self.op.disk_state:
15357 self.group.disk_state_static = self.new_disk_state
15359 if self.op.ipolicy:
15360 self.group.ipolicy = self.new_ipolicy
15362 self.cfg.Update(self.group, feedback_fn)
15366 class LUGroupRemove(LogicalUnit):
15367 HPATH = "group-remove"
15368 HTYPE = constants.HTYPE_GROUP
15371 def ExpandNames(self):
15372 # This will raises errors.OpPrereqError on its own:
15373 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15374 self.needed_locks = {
15375 locking.LEVEL_NODEGROUP: [self.group_uuid],
15378 def CheckPrereq(self):
15379 """Check prerequisites.
15381 This checks that the given group name exists as a node group, that is
15382 empty (i.e., contains no nodes), and that is not the last group of the
15386 # Verify that the group is empty.
15387 group_nodes = [node.name
15388 for node in self.cfg.GetAllNodesInfo().values()
15389 if node.group == self.group_uuid]
15392 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15394 (self.op.group_name,
15395 utils.CommaJoin(utils.NiceSort(group_nodes))),
15396 errors.ECODE_STATE)
15398 # Verify the cluster would not be left group-less.
15399 if len(self.cfg.GetNodeGroupList()) == 1:
15400 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15401 " removed" % self.op.group_name,
15402 errors.ECODE_STATE)
15404 def BuildHooksEnv(self):
15405 """Build hooks env.
15409 "GROUP_NAME": self.op.group_name,
15412 def BuildHooksNodes(self):
15413 """Build hooks nodes.
15416 mn = self.cfg.GetMasterNode()
15417 return ([mn], [mn])
15419 def Exec(self, feedback_fn):
15420 """Remove the node group.
15424 self.cfg.RemoveNodeGroup(self.group_uuid)
15425 except errors.ConfigurationError:
15426 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15427 (self.op.group_name, self.group_uuid))
15429 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15432 class LUGroupRename(LogicalUnit):
15433 HPATH = "group-rename"
15434 HTYPE = constants.HTYPE_GROUP
15437 def ExpandNames(self):
15438 # This raises errors.OpPrereqError on its own:
15439 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15441 self.needed_locks = {
15442 locking.LEVEL_NODEGROUP: [self.group_uuid],
15445 def CheckPrereq(self):
15446 """Check prerequisites.
15448 Ensures requested new name is not yet used.
15452 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15453 except errors.OpPrereqError:
15456 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15457 " node group (UUID: %s)" %
15458 (self.op.new_name, new_name_uuid),
15459 errors.ECODE_EXISTS)
15461 def BuildHooksEnv(self):
15462 """Build hooks env.
15466 "OLD_NAME": self.op.group_name,
15467 "NEW_NAME": self.op.new_name,
15470 def BuildHooksNodes(self):
15471 """Build hooks nodes.
15474 mn = self.cfg.GetMasterNode()
15476 all_nodes = self.cfg.GetAllNodesInfo()
15477 all_nodes.pop(mn, None)
15480 run_nodes.extend(node.name for node in all_nodes.values()
15481 if node.group == self.group_uuid)
15483 return (run_nodes, run_nodes)
15485 def Exec(self, feedback_fn):
15486 """Rename the node group.
15489 group = self.cfg.GetNodeGroup(self.group_uuid)
15492 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15493 (self.op.group_name, self.group_uuid))
15495 group.name = self.op.new_name
15496 self.cfg.Update(group, feedback_fn)
15498 return self.op.new_name
15501 class LUGroupEvacuate(LogicalUnit):
15502 HPATH = "group-evacuate"
15503 HTYPE = constants.HTYPE_GROUP
15506 def ExpandNames(self):
15507 # This raises errors.OpPrereqError on its own:
15508 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15510 if self.op.target_groups:
15511 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15512 self.op.target_groups)
15514 self.req_target_uuids = []
15516 if self.group_uuid in self.req_target_uuids:
15517 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15518 " as a target group (targets are %s)" %
15520 utils.CommaJoin(self.req_target_uuids)),
15521 errors.ECODE_INVAL)
15523 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15525 self.share_locks = _ShareAll()
15526 self.needed_locks = {
15527 locking.LEVEL_INSTANCE: [],
15528 locking.LEVEL_NODEGROUP: [],
15529 locking.LEVEL_NODE: [],
15532 def DeclareLocks(self, level):
15533 if level == locking.LEVEL_INSTANCE:
15534 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15536 # Lock instances optimistically, needs verification once node and group
15537 # locks have been acquired
15538 self.needed_locks[locking.LEVEL_INSTANCE] = \
15539 self.cfg.GetNodeGroupInstances(self.group_uuid)
15541 elif level == locking.LEVEL_NODEGROUP:
15542 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15544 if self.req_target_uuids:
15545 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15547 # Lock all groups used by instances optimistically; this requires going
15548 # via the node before it's locked, requiring verification later on
15549 lock_groups.update(group_uuid
15550 for instance_name in
15551 self.owned_locks(locking.LEVEL_INSTANCE)
15553 self.cfg.GetInstanceNodeGroups(instance_name))
15555 # No target groups, need to lock all of them
15556 lock_groups = locking.ALL_SET
15558 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15560 elif level == locking.LEVEL_NODE:
15561 # This will only lock the nodes in the group to be evacuated which
15562 # contain actual instances
15563 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15564 self._LockInstancesNodes()
15566 # Lock all nodes in group to be evacuated and target groups
15567 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15568 assert self.group_uuid in owned_groups
15569 member_nodes = [node_name
15570 for group in owned_groups
15571 for node_name in self.cfg.GetNodeGroup(group).members]
15572 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15574 def CheckPrereq(self):
15575 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15576 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15577 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15579 assert owned_groups.issuperset(self.req_target_uuids)
15580 assert self.group_uuid in owned_groups
15582 # Check if locked instances are still correct
15583 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15585 # Get instance information
15586 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15588 # Check if node groups for locked instances are still correct
15589 _CheckInstancesNodeGroups(self.cfg, self.instances,
15590 owned_groups, owned_nodes, self.group_uuid)
15592 if self.req_target_uuids:
15593 # User requested specific target groups
15594 self.target_uuids = self.req_target_uuids
15596 # All groups except the one to be evacuated are potential targets
15597 self.target_uuids = [group_uuid for group_uuid in owned_groups
15598 if group_uuid != self.group_uuid]
15600 if not self.target_uuids:
15601 raise errors.OpPrereqError("There are no possible target groups",
15602 errors.ECODE_INVAL)
15604 def BuildHooksEnv(self):
15605 """Build hooks env.
15609 "GROUP_NAME": self.op.group_name,
15610 "TARGET_GROUPS": " ".join(self.target_uuids),
15613 def BuildHooksNodes(self):
15614 """Build hooks nodes.
15617 mn = self.cfg.GetMasterNode()
15619 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15621 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15623 return (run_nodes, run_nodes)
15625 def Exec(self, feedback_fn):
15626 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15628 assert self.group_uuid not in self.target_uuids
15630 req = iallocator.IAReqGroupChange(instances=instances,
15631 target_groups=self.target_uuids)
15632 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15634 ial.Run(self.op.iallocator)
15636 if not ial.success:
15637 raise errors.OpPrereqError("Can't compute group evacuation using"
15638 " iallocator '%s': %s" %
15639 (self.op.iallocator, ial.info),
15640 errors.ECODE_NORES)
15642 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15644 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15645 len(jobs), self.op.group_name)
15647 return ResultWithJobs(jobs)
15650 class TagsLU(NoHooksLU): # pylint: disable=W0223
15651 """Generic tags LU.
15653 This is an abstract class which is the parent of all the other tags LUs.
15656 def ExpandNames(self):
15657 self.group_uuid = None
15658 self.needed_locks = {}
15660 if self.op.kind == constants.TAG_NODE:
15661 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15662 lock_level = locking.LEVEL_NODE
15663 lock_name = self.op.name
15664 elif self.op.kind == constants.TAG_INSTANCE:
15665 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15666 lock_level = locking.LEVEL_INSTANCE
15667 lock_name = self.op.name
15668 elif self.op.kind == constants.TAG_NODEGROUP:
15669 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15670 lock_level = locking.LEVEL_NODEGROUP
15671 lock_name = self.group_uuid
15672 elif self.op.kind == constants.TAG_NETWORK:
15673 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15674 lock_level = locking.LEVEL_NETWORK
15675 lock_name = self.network_uuid
15680 if lock_level and getattr(self.op, "use_locking", True):
15681 self.needed_locks[lock_level] = lock_name
15683 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15684 # not possible to acquire the BGL based on opcode parameters)
15686 def CheckPrereq(self):
15687 """Check prerequisites.
15690 if self.op.kind == constants.TAG_CLUSTER:
15691 self.target = self.cfg.GetClusterInfo()
15692 elif self.op.kind == constants.TAG_NODE:
15693 self.target = self.cfg.GetNodeInfo(self.op.name)
15694 elif self.op.kind == constants.TAG_INSTANCE:
15695 self.target = self.cfg.GetInstanceInfo(self.op.name)
15696 elif self.op.kind == constants.TAG_NODEGROUP:
15697 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15698 elif self.op.kind == constants.TAG_NETWORK:
15699 self.target = self.cfg.GetNetwork(self.network_uuid)
15701 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15702 str(self.op.kind), errors.ECODE_INVAL)
15705 class LUTagsGet(TagsLU):
15706 """Returns the tags of a given object.
15711 def ExpandNames(self):
15712 TagsLU.ExpandNames(self)
15714 # Share locks as this is only a read operation
15715 self.share_locks = _ShareAll()
15717 def Exec(self, feedback_fn):
15718 """Returns the tag list.
15721 return list(self.target.GetTags())
15724 class LUTagsSearch(NoHooksLU):
15725 """Searches the tags for a given pattern.
15730 def ExpandNames(self):
15731 self.needed_locks = {}
15733 def CheckPrereq(self):
15734 """Check prerequisites.
15736 This checks the pattern passed for validity by compiling it.
15740 self.re = re.compile(self.op.pattern)
15741 except re.error, err:
15742 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15743 (self.op.pattern, err), errors.ECODE_INVAL)
15745 def Exec(self, feedback_fn):
15746 """Returns the tag list.
15750 tgts = [("/cluster", cfg.GetClusterInfo())]
15751 ilist = cfg.GetAllInstancesInfo().values()
15752 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15753 nlist = cfg.GetAllNodesInfo().values()
15754 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15755 tgts.extend(("/nodegroup/%s" % n.name, n)
15756 for n in cfg.GetAllNodeGroupsInfo().values())
15758 for path, target in tgts:
15759 for tag in target.GetTags():
15760 if self.re.search(tag):
15761 results.append((path, tag))
15765 class LUTagsSet(TagsLU):
15766 """Sets a tag on a given object.
15771 def CheckPrereq(self):
15772 """Check prerequisites.
15774 This checks the type and length of the tag name and value.
15777 TagsLU.CheckPrereq(self)
15778 for tag in self.op.tags:
15779 objects.TaggableObject.ValidateTag(tag)
15781 def Exec(self, feedback_fn):
15786 for tag in self.op.tags:
15787 self.target.AddTag(tag)
15788 except errors.TagError, err:
15789 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15790 self.cfg.Update(self.target, feedback_fn)
15793 class LUTagsDel(TagsLU):
15794 """Delete a list of tags from a given object.
15799 def CheckPrereq(self):
15800 """Check prerequisites.
15802 This checks that we have the given tag.
15805 TagsLU.CheckPrereq(self)
15806 for tag in self.op.tags:
15807 objects.TaggableObject.ValidateTag(tag)
15808 del_tags = frozenset(self.op.tags)
15809 cur_tags = self.target.GetTags()
15811 diff_tags = del_tags - cur_tags
15813 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15814 raise errors.OpPrereqError("Tag(s) %s not found" %
15815 (utils.CommaJoin(diff_names), ),
15816 errors.ECODE_NOENT)
15818 def Exec(self, feedback_fn):
15819 """Remove the tag from the object.
15822 for tag in self.op.tags:
15823 self.target.RemoveTag(tag)
15824 self.cfg.Update(self.target, feedback_fn)
15827 class LUTestDelay(NoHooksLU):
15828 """Sleep for a specified amount of time.
15830 This LU sleeps on the master and/or nodes for a specified amount of
15836 def ExpandNames(self):
15837 """Expand names and set required locks.
15839 This expands the node list, if any.
15842 self.needed_locks = {}
15843 if self.op.on_nodes:
15844 # _GetWantedNodes can be used here, but is not always appropriate to use
15845 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15846 # more information.
15847 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15848 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15850 def _TestDelay(self):
15851 """Do the actual sleep.
15854 if self.op.on_master:
15855 if not utils.TestDelay(self.op.duration):
15856 raise errors.OpExecError("Error during master delay test")
15857 if self.op.on_nodes:
15858 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15859 for node, node_result in result.items():
15860 node_result.Raise("Failure during rpc call to node %s" % node)
15862 def Exec(self, feedback_fn):
15863 """Execute the test delay opcode, with the wanted repetitions.
15866 if self.op.repeat == 0:
15869 top_value = self.op.repeat - 1
15870 for i in range(self.op.repeat):
15871 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15875 class LURestrictedCommand(NoHooksLU):
15876 """Logical unit for executing restricted commands.
15881 def ExpandNames(self):
15883 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15885 self.needed_locks = {
15886 locking.LEVEL_NODE: self.op.nodes,
15888 self.share_locks = {
15889 locking.LEVEL_NODE: not self.op.use_locking,
15892 def CheckPrereq(self):
15893 """Check prerequisites.
15897 def Exec(self, feedback_fn):
15898 """Execute restricted command and return output.
15901 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15903 # Check if correct locks are held
15904 assert set(self.op.nodes).issubset(owned_nodes)
15906 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15910 for node_name in self.op.nodes:
15911 nres = rpcres[node_name]
15913 msg = ("Command '%s' on node '%s' failed: %s" %
15914 (self.op.command, node_name, nres.fail_msg))
15915 result.append((False, msg))
15917 result.append((True, nres.payload))
15922 class LUTestJqueue(NoHooksLU):
15923 """Utility LU to test some aspects of the job queue.
15928 # Must be lower than default timeout for WaitForJobChange to see whether it
15929 # notices changed jobs
15930 _CLIENT_CONNECT_TIMEOUT = 20.0
15931 _CLIENT_CONFIRM_TIMEOUT = 60.0
15934 def _NotifyUsingSocket(cls, cb, errcls):
15935 """Opens a Unix socket and waits for another program to connect.
15938 @param cb: Callback to send socket name to client
15939 @type errcls: class
15940 @param errcls: Exception class to use for errors
15943 # Using a temporary directory as there's no easy way to create temporary
15944 # sockets without writing a custom loop around tempfile.mktemp and
15946 tmpdir = tempfile.mkdtemp()
15948 tmpsock = utils.PathJoin(tmpdir, "sock")
15950 logging.debug("Creating temporary socket at %s", tmpsock)
15951 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15956 # Send details to client
15959 # Wait for client to connect before continuing
15960 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15962 (conn, _) = sock.accept()
15963 except socket.error, err:
15964 raise errcls("Client didn't connect in time (%s)" % err)
15968 # Remove as soon as client is connected
15969 shutil.rmtree(tmpdir)
15971 # Wait for client to close
15974 # pylint: disable=E1101
15975 # Instance of '_socketobject' has no ... member
15976 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15978 except socket.error, err:
15979 raise errcls("Client failed to confirm notification (%s)" % err)
15983 def _SendNotification(self, test, arg, sockname):
15984 """Sends a notification to the client.
15987 @param test: Test name
15988 @param arg: Test argument (depends on test)
15989 @type sockname: string
15990 @param sockname: Socket path
15993 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15995 def _Notify(self, prereq, test, arg):
15996 """Notifies the client of a test.
15999 @param prereq: Whether this is a prereq-phase test
16001 @param test: Test name
16002 @param arg: Test argument (depends on test)
16006 errcls = errors.OpPrereqError
16008 errcls = errors.OpExecError
16010 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16014 def CheckArguments(self):
16015 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16016 self.expandnames_calls = 0
16018 def ExpandNames(self):
16019 checkargs_calls = getattr(self, "checkargs_calls", 0)
16020 if checkargs_calls < 1:
16021 raise errors.ProgrammerError("CheckArguments was not called")
16023 self.expandnames_calls += 1
16025 if self.op.notify_waitlock:
16026 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16028 self.LogInfo("Expanding names")
16030 # Get lock on master node (just to get a lock, not for a particular reason)
16031 self.needed_locks = {
16032 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16035 def Exec(self, feedback_fn):
16036 if self.expandnames_calls < 1:
16037 raise errors.ProgrammerError("ExpandNames was not called")
16039 if self.op.notify_exec:
16040 self._Notify(False, constants.JQT_EXEC, None)
16042 self.LogInfo("Executing")
16044 if self.op.log_messages:
16045 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16046 for idx, msg in enumerate(self.op.log_messages):
16047 self.LogInfo("Sending log message %s", idx + 1)
16048 feedback_fn(constants.JQT_MSGPREFIX + msg)
16049 # Report how many test messages have been sent
16050 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16053 raise errors.OpExecError("Opcode failure was requested")
16058 class LUTestAllocator(NoHooksLU):
16059 """Run allocator tests.
16061 This LU runs the allocator tests
16064 def CheckPrereq(self):
16065 """Check prerequisites.
16067 This checks the opcode parameters depending on the director and mode test.
16070 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16071 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16072 for attr in ["memory", "disks", "disk_template",
16073 "os", "tags", "nics", "vcpus"]:
16074 if not hasattr(self.op, attr):
16075 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16076 attr, errors.ECODE_INVAL)
16077 iname = self.cfg.ExpandInstanceName(self.op.name)
16078 if iname is not None:
16079 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16080 iname, errors.ECODE_EXISTS)
16081 if not isinstance(self.op.nics, list):
16082 raise errors.OpPrereqError("Invalid parameter 'nics'",
16083 errors.ECODE_INVAL)
16084 if not isinstance(self.op.disks, list):
16085 raise errors.OpPrereqError("Invalid parameter 'disks'",
16086 errors.ECODE_INVAL)
16087 for row in self.op.disks:
16088 if (not isinstance(row, dict) or
16089 constants.IDISK_SIZE not in row or
16090 not isinstance(row[constants.IDISK_SIZE], int) or
16091 constants.IDISK_MODE not in row or
16092 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16093 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16094 " parameter", errors.ECODE_INVAL)
16095 if self.op.hypervisor is None:
16096 self.op.hypervisor = self.cfg.GetHypervisorType()
16097 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16098 fname = _ExpandInstanceName(self.cfg, self.op.name)
16099 self.op.name = fname
16100 self.relocate_from = \
16101 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16102 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16103 constants.IALLOCATOR_MODE_NODE_EVAC):
16104 if not self.op.instances:
16105 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16106 self.op.instances = _GetWantedInstances(self, self.op.instances)
16108 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16109 self.op.mode, errors.ECODE_INVAL)
16111 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16112 if self.op.iallocator is None:
16113 raise errors.OpPrereqError("Missing allocator name",
16114 errors.ECODE_INVAL)
16115 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16116 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16117 self.op.direction, errors.ECODE_INVAL)
16119 def Exec(self, feedback_fn):
16120 """Run the allocator test.
16123 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16124 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16125 memory=self.op.memory,
16126 disks=self.op.disks,
16127 disk_template=self.op.disk_template,
16131 vcpus=self.op.vcpus,
16132 spindle_use=self.op.spindle_use,
16133 hypervisor=self.op.hypervisor)
16134 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16135 req = iallocator.IAReqRelocate(name=self.op.name,
16136 relocate_from=list(self.relocate_from))
16137 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16138 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16139 target_groups=self.op.target_groups)
16140 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16141 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16142 evac_mode=self.op.evac_mode)
16143 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16144 disk_template = self.op.disk_template
16145 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16146 memory=self.op.memory,
16147 disks=self.op.disks,
16148 disk_template=disk_template,
16152 vcpus=self.op.vcpus,
16153 spindle_use=self.op.spindle_use,
16154 hypervisor=self.op.hypervisor)
16155 for idx in range(self.op.count)]
16156 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16158 raise errors.ProgrammerError("Uncatched mode %s in"
16159 " LUTestAllocator.Exec", self.op.mode)
16161 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16162 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16163 result = ial.in_text
16165 ial.Run(self.op.iallocator, validate=False)
16166 result = ial.out_text
16170 class LUNetworkAdd(LogicalUnit):
16171 """Logical unit for creating networks.
16174 HPATH = "network-add"
16175 HTYPE = constants.HTYPE_NETWORK
16178 def BuildHooksNodes(self):
16179 """Build hooks nodes.
16182 mn = self.cfg.GetMasterNode()
16183 return ([mn], [mn])
16185 def CheckArguments(self):
16186 if self.op.mac_prefix:
16187 self.op.mac_prefix = \
16188 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16190 def ExpandNames(self):
16191 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16193 if self.op.conflicts_check:
16194 self.share_locks[locking.LEVEL_NODE] = 1
16195 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16196 self.needed_locks = {
16197 locking.LEVEL_NODE: locking.ALL_SET,
16198 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16201 self.needed_locks = {}
16203 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16205 def CheckPrereq(self):
16206 if self.op.network is None:
16207 raise errors.OpPrereqError("Network must be given",
16208 errors.ECODE_INVAL)
16210 uuid = self.cfg.LookupNetwork(self.op.network_name)
16213 raise errors.OpPrereqError(("Network with name '%s' already exists" %
16214 self.op.network_name), errors.ECODE_EXISTS)
16216 # Check tag validity
16217 for tag in self.op.tags:
16218 objects.TaggableObject.ValidateTag(tag)
16220 def BuildHooksEnv(self):
16221 """Build hooks env.
16225 "name": self.op.network_name,
16226 "subnet": self.op.network,
16227 "gateway": self.op.gateway,
16228 "network6": self.op.network6,
16229 "gateway6": self.op.gateway6,
16230 "mac_prefix": self.op.mac_prefix,
16231 "tags": self.op.tags,
16233 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16235 def Exec(self, feedback_fn):
16236 """Add the ip pool to the cluster.
16239 nobj = objects.Network(name=self.op.network_name,
16240 network=self.op.network,
16241 gateway=self.op.gateway,
16242 network6=self.op.network6,
16243 gateway6=self.op.gateway6,
16244 mac_prefix=self.op.mac_prefix,
16245 uuid=self.network_uuid,
16246 family=constants.IP4_VERSION)
16247 # Initialize the associated address pool
16249 pool = network.AddressPool.InitializeNetwork(nobj)
16250 except errors.AddressPoolError, e:
16251 raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16253 # Check if we need to reserve the nodes and the cluster master IP
16254 # These may not be allocated to any instances in routed mode, as
16255 # they wouldn't function anyway.
16256 if self.op.conflicts_check:
16257 for node in self.cfg.GetAllNodesInfo().values():
16258 for ip in [node.primary_ip, node.secondary_ip]:
16260 if pool.Contains(ip):
16262 self.LogInfo("Reserved IP address of node '%s' (%s)",
16264 except errors.AddressPoolError:
16265 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16268 master_ip = self.cfg.GetClusterInfo().master_ip
16270 if pool.Contains(master_ip):
16271 pool.Reserve(master_ip)
16272 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16273 except errors.AddressPoolError:
16274 self.LogWarning("Cannot reserve cluster master IP address (%s)",
16277 if self.op.add_reserved_ips:
16278 for ip in self.op.add_reserved_ips:
16280 pool.Reserve(ip, external=True)
16281 except errors.AddressPoolError, e:
16282 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16285 for tag in self.op.tags:
16288 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16289 del self.remove_locks[locking.LEVEL_NETWORK]
16292 class LUNetworkRemove(LogicalUnit):
16293 HPATH = "network-remove"
16294 HTYPE = constants.HTYPE_NETWORK
16297 def ExpandNames(self):
16298 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16300 if not self.network_uuid:
16301 raise errors.OpPrereqError(("Network '%s' not found" %
16302 self.op.network_name), errors.ECODE_NOENT)
16304 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16305 self.needed_locks = {
16306 locking.LEVEL_NETWORK: [self.network_uuid],
16307 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16310 def CheckPrereq(self):
16311 """Check prerequisites.
16313 This checks that the given network name exists as a network, that is
16314 empty (i.e., contains no nodes), and that is not the last group of the
16318 # Verify that the network is not conncted.
16319 node_groups = [group.name
16320 for group in self.cfg.GetAllNodeGroupsInfo().values()
16321 if self.network_uuid in group.networks]
16324 self.LogWarning("Network '%s' is connected to the following"
16325 " node groups: %s" %
16326 (self.op.network_name,
16327 utils.CommaJoin(utils.NiceSort(node_groups))))
16328 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16330 def BuildHooksEnv(self):
16331 """Build hooks env.
16335 "NETWORK_NAME": self.op.network_name,
16338 def BuildHooksNodes(self):
16339 """Build hooks nodes.
16342 mn = self.cfg.GetMasterNode()
16343 return ([mn], [mn])
16345 def Exec(self, feedback_fn):
16346 """Remove the network.
16350 self.cfg.RemoveNetwork(self.network_uuid)
16351 except errors.ConfigurationError:
16352 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16353 (self.op.network_name, self.network_uuid))
16356 class LUNetworkSetParams(LogicalUnit):
16357 """Modifies the parameters of a network.
16360 HPATH = "network-modify"
16361 HTYPE = constants.HTYPE_NETWORK
16364 def CheckArguments(self):
16365 if (self.op.gateway and
16366 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16367 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16368 " at once", errors.ECODE_INVAL)
16370 def ExpandNames(self):
16371 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16372 if self.network_uuid is None:
16373 raise errors.OpPrereqError(("Network '%s' not found" %
16374 self.op.network_name), errors.ECODE_NOENT)
16376 self.needed_locks = {
16377 locking.LEVEL_NETWORK: [self.network_uuid],
16380 def CheckPrereq(self):
16381 """Check prerequisites.
16384 self.network = self.cfg.GetNetwork(self.network_uuid)
16385 self.gateway = self.network.gateway
16386 self.mac_prefix = self.network.mac_prefix
16387 self.network6 = self.network.network6
16388 self.gateway6 = self.network.gateway6
16389 self.tags = self.network.tags
16391 self.pool = network.AddressPool(self.network)
16393 if self.op.gateway:
16394 if self.op.gateway == constants.VALUE_NONE:
16395 self.gateway = None
16397 self.gateway = self.op.gateway
16398 if self.pool.IsReserved(self.gateway):
16399 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16400 " reserved" % self.gateway,
16401 errors.ECODE_STATE)
16403 if self.op.mac_prefix:
16404 if self.op.mac_prefix == constants.VALUE_NONE:
16405 self.mac_prefix = None
16407 self.mac_prefix = \
16408 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16410 if self.op.gateway6:
16411 if self.op.gateway6 == constants.VALUE_NONE:
16412 self.gateway6 = None
16414 self.gateway6 = self.op.gateway6
16416 if self.op.network6:
16417 if self.op.network6 == constants.VALUE_NONE:
16418 self.network6 = None
16420 self.network6 = self.op.network6
16422 def BuildHooksEnv(self):
16423 """Build hooks env.
16427 "name": self.op.network_name,
16428 "subnet": self.network.network,
16429 "gateway": self.gateway,
16430 "network6": self.network6,
16431 "gateway6": self.gateway6,
16432 "mac_prefix": self.mac_prefix,
16435 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16437 def BuildHooksNodes(self):
16438 """Build hooks nodes.
16441 mn = self.cfg.GetMasterNode()
16442 return ([mn], [mn])
16444 def Exec(self, feedback_fn):
16445 """Modifies the network.
16448 #TODO: reserve/release via temporary reservation manager
16449 # extend cfg.ReserveIp/ReleaseIp with the external flag
16450 if self.op.gateway:
16451 if self.gateway == self.network.gateway:
16452 self.LogWarning("Gateway is already %s", self.gateway)
16455 self.pool.Reserve(self.gateway, external=True)
16456 if self.network.gateway:
16457 self.pool.Release(self.network.gateway, external=True)
16458 self.network.gateway = self.gateway
16460 if self.op.add_reserved_ips:
16461 for ip in self.op.add_reserved_ips:
16463 if self.pool.IsReserved(ip):
16464 self.LogWarning("IP address %s is already reserved", ip)
16466 self.pool.Reserve(ip, external=True)
16467 except errors.AddressPoolError, err:
16468 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16470 if self.op.remove_reserved_ips:
16471 for ip in self.op.remove_reserved_ips:
16472 if ip == self.network.gateway:
16473 self.LogWarning("Cannot unreserve Gateway's IP")
16476 if not self.pool.IsReserved(ip):
16477 self.LogWarning("IP address %s is already unreserved", ip)
16479 self.pool.Release(ip, external=True)
16480 except errors.AddressPoolError, err:
16481 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16483 if self.op.mac_prefix:
16484 self.network.mac_prefix = self.mac_prefix
16486 if self.op.network6:
16487 self.network.network6 = self.network6
16489 if self.op.gateway6:
16490 self.network.gateway6 = self.gateway6
16492 self.pool.Validate()
16494 self.cfg.Update(self.network, feedback_fn)
16497 class _NetworkQuery(_QueryBase):
16498 FIELDS = query.NETWORK_FIELDS
16500 def ExpandNames(self, lu):
16501 lu.needed_locks = {}
16502 lu.share_locks = _ShareAll()
16504 self.do_locking = self.use_locking
16506 all_networks = lu.cfg.GetAllNetworksInfo()
16507 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16513 for name in self.names:
16514 if name in name_to_uuid:
16515 self.wanted.append(name_to_uuid[name])
16517 missing.append(name)
16520 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16521 errors.ECODE_NOENT)
16523 self.wanted = locking.ALL_SET
16525 if self.do_locking:
16526 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16527 if query.NETQ_INST in self.requested_data:
16528 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16529 if query.NETQ_GROUP in self.requested_data:
16530 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16532 def DeclareLocks(self, lu, level):
16535 def _GetQueryData(self, lu):
16536 """Computes the list of networks and their attributes.
16539 all_networks = lu.cfg.GetAllNetworksInfo()
16541 network_uuids = self._GetNames(lu, all_networks.keys(),
16542 locking.LEVEL_NETWORK)
16544 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16546 do_instances = query.NETQ_INST in self.requested_data
16547 do_groups = query.NETQ_GROUP in self.requested_data
16549 network_to_instances = None
16550 network_to_groups = None
16552 # For NETQ_GROUP, we need to map network->[groups]
16554 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16555 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16556 for _, group in all_groups.iteritems():
16557 for net_uuid in network_uuids:
16558 netparams = group.networks.get(net_uuid, None)
16560 info = (group.name, netparams[constants.NIC_MODE],
16561 netparams[constants.NIC_LINK])
16563 network_to_groups[net_uuid].append(info)
16566 all_instances = lu.cfg.GetAllInstancesInfo()
16567 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16568 for instance in all_instances.values():
16569 for nic in instance.nics:
16571 net_uuid = name_to_uuid[nic.network]
16572 if net_uuid in network_uuids:
16573 network_to_instances[net_uuid].append(instance.name)
16576 if query.NETQ_STATS in self.requested_data:
16579 self._GetStats(network.AddressPool(all_networks[uuid])))
16580 for uuid in network_uuids)
16584 return query.NetworkQueryData([all_networks[uuid]
16585 for uuid in network_uuids],
16587 network_to_instances,
16591 def _GetStats(pool):
16592 """Returns statistics for a network address pool.
16596 "free_count": pool.GetFreeCount(),
16597 "reserved_count": pool.GetReservedCount(),
16598 "map": pool.GetMap(),
16599 "external_reservations":
16600 utils.CommaJoin(pool.GetExternalReservations()),
16604 class LUNetworkQuery(NoHooksLU):
16605 """Logical unit for querying networks.
16610 def CheckArguments(self):
16611 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16612 self.op.output_fields, self.op.use_locking)
16614 def ExpandNames(self):
16615 self.nq.ExpandNames(self)
16617 def Exec(self, feedback_fn):
16618 return self.nq.OldStyleQuery(self)
16621 class LUNetworkConnect(LogicalUnit):
16622 """Connect a network to a nodegroup
16625 HPATH = "network-connect"
16626 HTYPE = constants.HTYPE_NETWORK
16629 def ExpandNames(self):
16630 self.network_name = self.op.network_name
16631 self.group_name = self.op.group_name
16632 self.network_mode = self.op.network_mode
16633 self.network_link = self.op.network_link
16635 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16636 if self.network_uuid is None:
16637 raise errors.OpPrereqError("Network '%s' does not exist" %
16638 self.network_name, errors.ECODE_NOENT)
16640 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16641 if self.group_uuid is None:
16642 raise errors.OpPrereqError("Group '%s' does not exist" %
16643 self.group_name, errors.ECODE_NOENT)
16645 self.needed_locks = {
16646 locking.LEVEL_INSTANCE: [],
16647 locking.LEVEL_NODEGROUP: [self.group_uuid],
16649 self.share_locks[locking.LEVEL_INSTANCE] = 1
16651 if self.op.conflicts_check:
16652 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16653 self.share_locks[locking.LEVEL_NETWORK] = 1
16655 def DeclareLocks(self, level):
16656 if level == locking.LEVEL_INSTANCE:
16657 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16659 # Lock instances optimistically, needs verification once group lock has
16661 if self.op.conflicts_check:
16662 self.needed_locks[locking.LEVEL_INSTANCE] = \
16663 self.cfg.GetNodeGroupInstances(self.group_uuid)
16665 def BuildHooksEnv(self):
16667 "GROUP_NAME": self.group_name,
16668 "GROUP_NETWORK_MODE": self.network_mode,
16669 "GROUP_NETWORK_LINK": self.network_link,
16673 def BuildHooksNodes(self):
16674 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16675 return (nodes, nodes)
16677 def CheckPrereq(self):
16678 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16680 assert self.group_uuid in owned_groups
16683 constants.NIC_MODE: self.network_mode,
16684 constants.NIC_LINK: self.network_link,
16686 objects.NIC.CheckParameterSyntax(self.netparams)
16688 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16689 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16690 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16691 self.connected = False
16692 if self.network_uuid in self.group.networks:
16693 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16694 (self.network_name, self.group.name))
16695 self.connected = True
16698 if self.op.conflicts_check:
16699 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16701 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16704 def Exec(self, feedback_fn):
16708 self.group.networks[self.network_uuid] = self.netparams
16709 self.cfg.Update(self.group, feedback_fn)
16712 def _NetworkConflictCheck(lu, check_fn, action):
16713 """Checks for network interface conflicts with a network.
16715 @type lu: L{LogicalUnit}
16716 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16718 @param check_fn: Function checking for conflict
16719 @type action: string
16720 @param action: Part of error message (see code)
16721 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16724 # Check if locked instances are still correct
16725 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16726 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16730 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16731 instconflicts = [(idx, nic.ip)
16732 for (idx, nic) in enumerate(instance.nics)
16736 conflicts.append((instance.name, instconflicts))
16739 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16740 " node group '%s', are in use: %s" %
16741 (lu.network_name, action, lu.group.name,
16742 utils.CommaJoin(("%s: %s" %
16743 (name, _FmtNetworkConflict(details)))
16744 for (name, details) in conflicts)))
16746 raise errors.OpPrereqError("Conflicting IP addresses found; "
16747 " remove/modify the corresponding network"
16748 " interfaces", errors.ECODE_STATE)
16751 def _FmtNetworkConflict(details):
16752 """Utility for L{_NetworkConflictCheck}.
16755 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16756 for (idx, ipaddr) in details)
16759 class LUNetworkDisconnect(LogicalUnit):
16760 """Disconnect a network to a nodegroup
16763 HPATH = "network-disconnect"
16764 HTYPE = constants.HTYPE_NETWORK
16767 def ExpandNames(self):
16768 self.network_name = self.op.network_name
16769 self.group_name = self.op.group_name
16771 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16772 if self.network_uuid is None:
16773 raise errors.OpPrereqError("Network '%s' does not exist" %
16774 self.network_name, errors.ECODE_NOENT)
16776 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16777 if self.group_uuid is None:
16778 raise errors.OpPrereqError("Group '%s' does not exist" %
16779 self.group_name, errors.ECODE_NOENT)
16781 self.needed_locks = {
16782 locking.LEVEL_INSTANCE: [],
16783 locking.LEVEL_NODEGROUP: [self.group_uuid],
16785 self.share_locks[locking.LEVEL_INSTANCE] = 1
16787 def DeclareLocks(self, level):
16788 if level == locking.LEVEL_INSTANCE:
16789 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16791 # Lock instances optimistically, needs verification once group lock has
16793 if self.op.conflicts_check:
16794 self.needed_locks[locking.LEVEL_INSTANCE] = \
16795 self.cfg.GetNodeGroupInstances(self.group_uuid)
16797 def BuildHooksEnv(self):
16799 "GROUP_NAME": self.group_name,
16803 def BuildHooksNodes(self):
16804 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16805 return (nodes, nodes)
16807 def CheckPrereq(self):
16808 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16810 assert self.group_uuid in owned_groups
16812 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16813 self.connected = True
16814 if self.network_uuid not in self.group.networks:
16815 self.LogWarning("Network '%s' is not mapped to group '%s'",
16816 self.network_name, self.group.name)
16817 self.connected = False
16820 if self.op.conflicts_check:
16821 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16824 def Exec(self, feedback_fn):
16825 if not self.connected:
16828 del self.group.networks[self.network_uuid]
16829 self.cfg.Update(self.group, feedback_fn)
16832 #: Query type implementations
16834 constants.QR_CLUSTER: _ClusterQuery,
16835 constants.QR_INSTANCE: _InstanceQuery,
16836 constants.QR_NODE: _NodeQuery,
16837 constants.QR_GROUP: _GroupQuery,
16838 constants.QR_NETWORK: _NetworkQuery,
16839 constants.QR_OS: _OsQuery,
16840 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16841 constants.QR_EXPORT: _ExportQuery,
16844 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16847 def _GetQueryImplementation(name):
16848 """Returns the implemtnation for a query type.
16850 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16854 return _QUERY_IMPL[name]
16856 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16857 errors.ECODE_INVAL)
16860 def _CheckForConflictingIp(lu, ip, node):
16861 """In case of conflicting IP address raise error.
16864 @param ip: IP address
16866 @param node: node name
16869 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16870 if conf_net is not None:
16871 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16873 errors.ECODE_STATE)
16875 return (None, None)