4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1234 _compute_fn=_ComputeMinMaxSpec):
1235 """Verifies ipolicy against provided specs.
1238 @param ipolicy: The ipolicy
1240 @param mem_size: The memory size
1241 @type cpu_count: int
1242 @param cpu_count: Used cpu cores
1243 @type disk_count: int
1244 @param disk_count: Number of disks used
1245 @type nic_count: int
1246 @param nic_count: Number of nics used
1247 @type disk_sizes: list of ints
1248 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249 @type spindle_use: int
1250 @param spindle_use: The number of spindles this instance uses
1251 @param _compute_fn: The compute function (unittest only)
1252 @return: A list of violations, or an empty list of no violations are found
1255 assert disk_count == len(disk_sizes)
1258 (constants.ISPEC_MEM_SIZE, "", mem_size),
1259 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260 (constants.ISPEC_DISK_COUNT, "", disk_count),
1261 (constants.ISPEC_NIC_COUNT, "", nic_count),
1262 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264 for idx, d in enumerate(disk_sizes)]
1267 (_compute_fn(name, qualifier, ipolicy, value)
1268 for (name, qualifier, value) in test_settings))
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272 _compute_fn=_ComputeIPolicySpecViolation):
1273 """Compute if instance meets the specs of ipolicy.
1276 @param ipolicy: The ipolicy to verify against
1277 @type instance: L{objects.Instance}
1278 @param instance: The instance to verify
1279 @param _compute_fn: The function to verify ipolicy (unittest only)
1280 @see: L{_ComputeIPolicySpecViolation}
1283 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286 disk_count = len(instance.disks)
1287 disk_sizes = [disk.size for disk in instance.disks]
1288 nic_count = len(instance.nics)
1290 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291 disk_sizes, spindle_use)
1294 def _ComputeIPolicyInstanceSpecViolation(
1295 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296 """Compute if instance specs meets the specs of ipolicy.
1299 @param ipolicy: The ipolicy to verify against
1300 @param instance_spec: dict
1301 @param instance_spec: The instance spec to verify
1302 @param _compute_fn: The function to verify ipolicy (unittest only)
1303 @see: L{_ComputeIPolicySpecViolation}
1306 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1313 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314 disk_sizes, spindle_use)
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1319 _compute_fn=_ComputeIPolicyInstanceViolation):
1320 """Compute if instance meets the specs of the new target group.
1322 @param ipolicy: The ipolicy to verify
1323 @param instance: The instance object to verify
1324 @param current_group: The current group of the instance
1325 @param target_group: The new group of the instance
1326 @param _compute_fn: The function to verify ipolicy (unittest only)
1327 @see: L{_ComputeIPolicySpecViolation}
1330 if current_group == target_group:
1333 return _compute_fn(ipolicy, instance)
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337 _compute_fn=_ComputeIPolicyNodeViolation):
1338 """Checks that the target node is correct in terms of instance policy.
1340 @param ipolicy: The ipolicy to verify
1341 @param instance: The instance object to verify
1342 @param node: The new node to relocate
1343 @param ignore: Ignore violations of the ipolicy
1344 @param _compute_fn: The function to verify ipolicy (unittest only)
1345 @see: L{_ComputeIPolicySpecViolation}
1348 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1352 msg = ("Instance does not meet target node group's (%s) instance"
1353 " policy: %s") % (node.group, utils.CommaJoin(res))
1357 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361 """Computes a set of any instances that would violate the new ipolicy.
1363 @param old_ipolicy: The current (still in-place) ipolicy
1364 @param new_ipolicy: The new (to become) ipolicy
1365 @param instances: List of instances to verify
1366 @return: A list of instances which violates the new ipolicy but
1370 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371 _ComputeViolatingInstances(old_ipolicy, instances))
1374 def _ExpandItemName(fn, name, kind):
1375 """Expand an item name.
1377 @param fn: the function to use for expansion
1378 @param name: requested item name
1379 @param kind: text description ('Node' or 'Instance')
1380 @return: the resolved (full) name
1381 @raise errors.OpPrereqError: if the item is not found
1384 full_name = fn(name)
1385 if full_name is None:
1386 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1391 def _ExpandNodeName(cfg, name):
1392 """Wrapper over L{_ExpandItemName} for nodes."""
1393 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1396 def _ExpandInstanceName(cfg, name):
1397 """Wrapper over L{_ExpandItemName} for instance."""
1398 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1403 """Builds network related env variables for hooks
1405 This builds the hook environment from individual variables.
1408 @param name: the name of the network
1409 @type subnet: string
1410 @param subnet: the ipv4 subnet
1411 @type gateway: string
1412 @param gateway: the ipv4 gateway
1413 @type network6: string
1414 @param network6: the ipv6 subnet
1415 @type gateway6: string
1416 @param gateway6: the ipv6 gateway
1417 @type mac_prefix: string
1418 @param mac_prefix: the mac_prefix
1420 @param tags: the tags of the network
1425 env["NETWORK_NAME"] = name
1427 env["NETWORK_SUBNET"] = subnet
1429 env["NETWORK_GATEWAY"] = gateway
1431 env["NETWORK_SUBNET6"] = network6
1433 env["NETWORK_GATEWAY6"] = gateway6
1435 env["NETWORK_MAC_PREFIX"] = mac_prefix
1437 env["NETWORK_TAGS"] = " ".join(tags)
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443 minmem, maxmem, vcpus, nics, disk_template, disks,
1444 bep, hvp, hypervisor_name, tags):
1445 """Builds instance related env variables for hooks
1447 This builds the hook environment from individual variables.
1450 @param name: the name of the instance
1451 @type primary_node: string
1452 @param primary_node: the name of the instance's primary node
1453 @type secondary_nodes: list
1454 @param secondary_nodes: list of secondary nodes as strings
1455 @type os_type: string
1456 @param os_type: the name of the instance's OS
1457 @type status: string
1458 @param status: the desired status of the instance
1459 @type minmem: string
1460 @param minmem: the minimum memory size of the instance
1461 @type maxmem: string
1462 @param maxmem: the maximum memory size of the instance
1464 @param vcpus: the count of VCPUs the instance has
1466 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467 the NICs the instance has
1468 @type disk_template: string
1469 @param disk_template: the disk template of the instance
1471 @param disks: the list of (size, mode) pairs
1473 @param bep: the backend parameters for the instance
1475 @param hvp: the hypervisor parameters for the instance
1476 @type hypervisor_name: string
1477 @param hypervisor_name: the hypervisor for the instance
1479 @param tags: list of instance tags as strings
1481 @return: the hook environment for this instance
1486 "INSTANCE_NAME": name,
1487 "INSTANCE_PRIMARY": primary_node,
1488 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489 "INSTANCE_OS_TYPE": os_type,
1490 "INSTANCE_STATUS": status,
1491 "INSTANCE_MINMEM": minmem,
1492 "INSTANCE_MAXMEM": maxmem,
1493 # TODO(2.7) remove deprecated "memory" value
1494 "INSTANCE_MEMORY": maxmem,
1495 "INSTANCE_VCPUS": vcpus,
1496 "INSTANCE_DISK_TEMPLATE": disk_template,
1497 "INSTANCE_HYPERVISOR": hypervisor_name,
1500 nic_count = len(nics)
1501 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1504 env["INSTANCE_NIC%d_IP" % idx] = ip
1505 env["INSTANCE_NIC%d_MAC" % idx] = mac
1506 env["INSTANCE_NIC%d_MODE" % idx] = mode
1507 env["INSTANCE_NIC%d_LINK" % idx] = link
1509 nobj = objects.Network.FromDict(netinfo)
1510 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1512 # FIXME: broken network reference: the instance NIC specifies a
1513 # network, but the relevant network entry was not in the config. This
1514 # should be made impossible.
1515 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1554 cluster = lu.cfg.GetClusterInfo()
1555 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556 mode = filled_params[constants.NIC_MODE]
1557 link = filled_params[constants.NIC_LINK]
1560 nobj = lu.cfg.GetNetwork(nic.network)
1561 netinfo = objects.Network.ToDict(nobj)
1562 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1565 def _NICListToTuple(lu, nics):
1566 """Build a list of nic information tuples.
1568 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569 value in LUInstanceQueryData.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nics: list of L{objects.NIC}
1574 @param nics: list of nics to convert to hooks tuples
1579 hooks_nics.append(_NICToTuple(lu, nic))
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584 """Builds instance related env variables for hooks from an object.
1586 @type lu: L{LogicalUnit}
1587 @param lu: the logical unit on whose behalf we execute
1588 @type instance: L{objects.Instance}
1589 @param instance: the instance for which we should build the
1591 @type override: dict
1592 @param override: dictionary with key/values that will override
1595 @return: the hook environment dictionary
1598 cluster = lu.cfg.GetClusterInfo()
1599 bep = cluster.FillBE(instance)
1600 hvp = cluster.FillHV(instance)
1602 "name": instance.name,
1603 "primary_node": instance.primary_node,
1604 "secondary_nodes": instance.secondary_nodes,
1605 "os_type": instance.os,
1606 "status": instance.admin_state,
1607 "maxmem": bep[constants.BE_MAXMEM],
1608 "minmem": bep[constants.BE_MINMEM],
1609 "vcpus": bep[constants.BE_VCPUS],
1610 "nics": _NICListToTuple(lu, instance.nics),
1611 "disk_template": instance.disk_template,
1612 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1615 "hypervisor_name": instance.hypervisor,
1616 "tags": instance.tags,
1619 args.update(override)
1620 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1623 def _AdjustCandidatePool(lu, exceptions):
1624 """Adjust the candidate pool after node operations.
1627 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1629 lu.LogInfo("Promoted nodes to master candidate role: %s",
1630 utils.CommaJoin(node.name for node in mod_list))
1631 for name in mod_list:
1632 lu.context.ReaddNode(name)
1633 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1635 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640 """Decide whether I should promote myself as a master candidate.
1643 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645 # the new node will increase mc_max with one, so:
1646 mc_should = min(mc_should + 1, cp_size)
1647 return mc_now < mc_should
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651 """Computes a set of instances who violates given ipolicy.
1653 @param ipolicy: The ipolicy to verify
1654 @type instances: object.Instance
1655 @param instances: List of instances to verify
1656 @return: A frozenset of instance names violating the ipolicy
1659 return frozenset([inst.name for inst in instances
1660 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664 """Check that the brigdes needed by a list of nics exist.
1667 cluster = lu.cfg.GetClusterInfo()
1668 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669 brlist = [params[constants.NIC_LINK] for params in paramslist
1670 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1672 result = lu.rpc.call_bridges_exist(target_node, brlist)
1673 result.Raise("Error checking bridges on destination node '%s'" %
1674 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678 """Check that the brigdes needed by an instance exist.
1682 node = instance.primary_node
1683 _CheckNicsBridgesExist(lu, instance.nics, node)
1686 def _CheckOSVariant(os_obj, name):
1687 """Check whether an OS name conforms to the os variants specification.
1689 @type os_obj: L{objects.OS}
1690 @param os_obj: OS object to check
1692 @param name: OS name passed by the user, to check for validity
1695 variant = objects.OS.GetVariant(name)
1696 if not os_obj.supported_variants:
1698 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699 " passed)" % (os_obj.name, variant),
1703 raise errors.OpPrereqError("OS name must include a variant",
1706 if variant not in os_obj.supported_variants:
1707 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1710 def _GetNodeInstancesInner(cfg, fn):
1711 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1714 def _GetNodeInstances(cfg, node_name):
1715 """Returns a list of all primary and secondary instances on a node.
1719 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723 """Returns primary instances on a node.
1726 return _GetNodeInstancesInner(cfg,
1727 lambda inst: node_name == inst.primary_node)
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731 """Returns secondary instances on a node.
1734 return _GetNodeInstancesInner(cfg,
1735 lambda inst: node_name in inst.secondary_nodes)
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739 """Returns the arguments for a storage type.
1742 # Special case for file storage
1743 if storage_type == constants.ST_FILE:
1744 # storage.FileStorage wants a list of storage directories
1745 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1753 for dev in instance.disks:
1754 cfg.SetDiskID(dev, node_name)
1756 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1758 result.Raise("Failed to get disk status from node %s" % node_name,
1759 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1761 for idx, bdev_status in enumerate(result.payload):
1762 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769 """Check the sanity of iallocator and node arguments and use the
1770 cluster-wide iallocator if appropriate.
1772 Check that at most one of (iallocator, node) is specified. If none is
1773 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774 then the LU's opcode's iallocator slot is filled with the cluster-wide
1777 @type iallocator_slot: string
1778 @param iallocator_slot: the name of the opcode iallocator slot
1779 @type node_slot: string
1780 @param node_slot: the name of the opcode target node slot
1783 node = getattr(lu.op, node_slot, None)
1784 ialloc = getattr(lu.op, iallocator_slot, None)
1788 if node is not None and ialloc is not None:
1789 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1791 elif ((node is None and ialloc is None) or
1792 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793 default_iallocator = lu.cfg.GetDefaultIAllocator()
1794 if default_iallocator:
1795 setattr(lu.op, iallocator_slot, default_iallocator)
1797 raise errors.OpPrereqError("No iallocator or node given and no"
1798 " cluster-wide default iallocator found;"
1799 " please specify either an iallocator or a"
1800 " node, or set a cluster-wide default"
1801 " iallocator", errors.ECODE_INVAL)
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805 """Decides on which iallocator to use.
1807 @type cfg: L{config.ConfigWriter}
1808 @param cfg: Cluster configuration object
1809 @type ialloc: string or None
1810 @param ialloc: Iallocator specified in opcode
1812 @return: Iallocator name
1816 # Use default iallocator
1817 ialloc = cfg.GetDefaultIAllocator()
1820 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821 " opcode nor as a cluster-wide default",
1827 def _CheckHostnameSane(lu, name):
1828 """Ensures that a given hostname resolves to a 'sane' name.
1830 The given name is required to be a prefix of the resolved hostname,
1831 to prevent accidental mismatches.
1833 @param lu: the logical unit on behalf of which we're checking
1834 @param name: the name we should resolve and check
1835 @return: the resolved hostname object
1838 hostname = netutils.GetHostname(name=name)
1839 if hostname.name != name:
1840 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841 if not utils.MatchNameComponent(name, [hostname.name]):
1842 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843 " same as given hostname '%s'") %
1844 (hostname.name, name), errors.ECODE_INVAL)
1848 class LUClusterPostInit(LogicalUnit):
1849 """Logical unit for running hooks after cluster initialization.
1852 HPATH = "cluster-init"
1853 HTYPE = constants.HTYPE_CLUSTER
1855 def BuildHooksEnv(self):
1860 "OP_TARGET": self.cfg.GetClusterName(),
1863 def BuildHooksNodes(self):
1864 """Build hooks nodes.
1867 return ([], [self.cfg.GetMasterNode()])
1869 def Exec(self, feedback_fn):
1876 class LUClusterDestroy(LogicalUnit):
1877 """Logical unit for destroying the cluster.
1880 HPATH = "cluster-destroy"
1881 HTYPE = constants.HTYPE_CLUSTER
1883 def BuildHooksEnv(self):
1888 "OP_TARGET": self.cfg.GetClusterName(),
1891 def BuildHooksNodes(self):
1892 """Build hooks nodes.
1897 def CheckPrereq(self):
1898 """Check prerequisites.
1900 This checks whether the cluster is empty.
1902 Any errors are signaled by raising errors.OpPrereqError.
1905 master = self.cfg.GetMasterNode()
1907 nodelist = self.cfg.GetNodeList()
1908 if len(nodelist) != 1 or nodelist[0] != master:
1909 raise errors.OpPrereqError("There are still %d node(s) in"
1910 " this cluster." % (len(nodelist) - 1),
1912 instancelist = self.cfg.GetInstanceList()
1914 raise errors.OpPrereqError("There are still %d instance(s) in"
1915 " this cluster." % len(instancelist),
1918 def Exec(self, feedback_fn):
1919 """Destroys the cluster.
1922 master_params = self.cfg.GetMasterNetworkParameters()
1924 # Run post hooks on master node before it's removed
1925 _RunPostHook(self, master_params.name)
1927 ems = self.cfg.GetUseExternalMipScript()
1928 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1931 self.LogWarning("Error disabling the master IP address: %s",
1934 return master_params.name
1937 def _VerifyCertificate(filename):
1938 """Verifies a certificate for L{LUClusterVerifyConfig}.
1940 @type filename: string
1941 @param filename: Path to PEM file
1945 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946 utils.ReadFile(filename))
1947 except Exception, err: # pylint: disable=W0703
1948 return (LUClusterVerifyConfig.ETYPE_ERROR,
1949 "Failed to load X509 certificate %s: %s" % (filename, err))
1952 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953 constants.SSL_CERT_EXPIRATION_ERROR)
1956 fnamemsg = "While verifying %s: %s" % (filename, msg)
1961 return (None, fnamemsg)
1962 elif errcode == utils.CERT_WARNING:
1963 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964 elif errcode == utils.CERT_ERROR:
1965 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1967 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1970 def _GetAllHypervisorParameters(cluster, instances):
1971 """Compute the set of all hypervisor parameters.
1973 @type cluster: L{objects.Cluster}
1974 @param cluster: the cluster object
1975 @param instances: list of L{objects.Instance}
1976 @param instances: additional instances from which to obtain parameters
1977 @rtype: list of (origin, hypervisor, parameters)
1978 @return: a list with all parameters found, indicating the hypervisor they
1979 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1984 for hv_name in cluster.enabled_hypervisors:
1985 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1987 for os_name, os_hvp in cluster.os_hvp.items():
1988 for hv_name, hv_params in os_hvp.items():
1990 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1993 # TODO: collapse identical parameter values in a single one
1994 for instance in instances:
1995 if instance.hvparams:
1996 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997 cluster.FillHV(instance)))
2002 class _VerifyErrors(object):
2003 """Mix-in for cluster/group verify LUs.
2005 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006 self.op and self._feedback_fn to be available.)
2010 ETYPE_FIELD = "code"
2011 ETYPE_ERROR = "ERROR"
2012 ETYPE_WARNING = "WARNING"
2014 def _Error(self, ecode, item, msg, *args, **kwargs):
2015 """Format an error message.
2017 Based on the opcode's error_codes parameter, either format a
2018 parseable error code, or a simpler error string.
2020 This must be called only from Exec and functions called from Exec.
2023 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024 itype, etxt, _ = ecode
2025 # If the error code is in the list of ignored errors, demote the error to a
2027 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2028 ltype = self.ETYPE_WARNING
2029 # first complete the msg
2032 # then format the whole message
2033 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2040 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041 # and finally report it via the feedback_fn
2042 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2043 # do not mark the operation as failed for WARN cases only
2044 if ltype == self.ETYPE_ERROR:
2047 def _ErrorIf(self, cond, *args, **kwargs):
2048 """Log an error message if the passed condition is True.
2052 or self.op.debug_simulate_errors): # pylint: disable=E1101
2053 self._Error(*args, **kwargs)
2056 class LUClusterVerify(NoHooksLU):
2057 """Submits all jobs necessary to verify the cluster.
2062 def ExpandNames(self):
2063 self.needed_locks = {}
2065 def Exec(self, feedback_fn):
2068 if self.op.group_name:
2069 groups = [self.op.group_name]
2070 depends_fn = lambda: None
2072 groups = self.cfg.GetNodeGroupList()
2074 # Verify global configuration
2076 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2079 # Always depend on global verification
2080 depends_fn = lambda: [(-len(jobs), [])]
2083 [opcodes.OpClusterVerifyGroup(group_name=group,
2084 ignore_errors=self.op.ignore_errors,
2085 depends=depends_fn())]
2086 for group in groups)
2088 # Fix up all parameters
2089 for op in itertools.chain(*jobs): # pylint: disable=W0142
2090 op.debug_simulate_errors = self.op.debug_simulate_errors
2091 op.verbose = self.op.verbose
2092 op.error_codes = self.op.error_codes
2094 op.skip_checks = self.op.skip_checks
2095 except AttributeError:
2096 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2098 return ResultWithJobs(jobs)
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102 """Verifies the cluster config.
2107 def _VerifyHVP(self, hvp_data):
2108 """Verifies locally the syntax of the hypervisor parameters.
2111 for item, hv_name, hv_params in hvp_data:
2112 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2115 hv_class = hypervisor.GetHypervisorClass(hv_name)
2116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117 hv_class.CheckParameterSyntax(hv_params)
2118 except errors.GenericError, err:
2119 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2121 def ExpandNames(self):
2122 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123 self.share_locks = _ShareAll()
2125 def CheckPrereq(self):
2126 """Check prerequisites.
2129 # Retrieve all information
2130 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131 self.all_node_info = self.cfg.GetAllNodesInfo()
2132 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2134 def Exec(self, feedback_fn):
2135 """Verify integrity of cluster, performing various test on nodes.
2139 self._feedback_fn = feedback_fn
2141 feedback_fn("* Verifying cluster config")
2143 for msg in self.cfg.VerifyConfig():
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2146 feedback_fn("* Verifying cluster certificate files")
2148 for cert_filename in pathutils.ALL_CERT_FILES:
2149 (errcode, msg) = _VerifyCertificate(cert_filename)
2150 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2152 feedback_fn("* Verifying hypervisor parameters")
2154 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155 self.all_inst_info.values()))
2157 feedback_fn("* Verifying all nodes belong to an existing group")
2159 # We do this verification here because, should this bogus circumstance
2160 # occur, it would never be caught by VerifyGroup, which only acts on
2161 # nodes/instances reachable from existing node groups.
2163 dangling_nodes = set(node.name for node in self.all_node_info.values()
2164 if node.group not in self.all_group_info)
2166 dangling_instances = {}
2167 no_node_instances = []
2169 for inst in self.all_inst_info.values():
2170 if inst.primary_node in dangling_nodes:
2171 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172 elif inst.primary_node not in self.all_node_info:
2173 no_node_instances.append(inst.name)
2178 utils.CommaJoin(dangling_instances.get(node.name,
2180 for node in dangling_nodes]
2182 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2184 "the following nodes (and their instances) belong to a non"
2185 " existing group: %s", utils.CommaJoin(pretty_dangling))
2187 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2189 "the following instances have a non-existing primary-node:"
2190 " %s", utils.CommaJoin(no_node_instances))
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196 """Verifies the status of a node group.
2199 HPATH = "cluster-verify"
2200 HTYPE = constants.HTYPE_CLUSTER
2203 _HOOKS_INDENT_RE = re.compile("^", re.M)
2205 class NodeImage(object):
2206 """A class representing the logical and physical status of a node.
2209 @ivar name: the node name to which this object refers
2210 @ivar volumes: a structure as returned from
2211 L{ganeti.backend.GetVolumeList} (runtime)
2212 @ivar instances: a list of running instances (runtime)
2213 @ivar pinst: list of configured primary instances (config)
2214 @ivar sinst: list of configured secondary instances (config)
2215 @ivar sbp: dictionary of {primary-node: list of instances} for all
2216 instances for which this node is secondary (config)
2217 @ivar mfree: free memory, as reported by hypervisor (runtime)
2218 @ivar dfree: free disk, as reported by the node (runtime)
2219 @ivar offline: the offline status (config)
2220 @type rpc_fail: boolean
2221 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222 not whether the individual keys were correct) (runtime)
2223 @type lvm_fail: boolean
2224 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225 @type hyp_fail: boolean
2226 @ivar hyp_fail: whether the RPC call didn't return the instance list
2227 @type ghost: boolean
2228 @ivar ghost: whether this is a known node or not (config)
2229 @type os_fail: boolean
2230 @ivar os_fail: whether the RPC call didn't return valid OS data
2232 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233 @type vm_capable: boolean
2234 @ivar vm_capable: whether the node can host instances
2236 @ivar pv_min: size in MiB of the smallest PVs
2238 @ivar pv_max: size in MiB of the biggest PVs
2241 def __init__(self, offline=False, name=None, vm_capable=True):
2250 self.offline = offline
2251 self.vm_capable = vm_capable
2252 self.rpc_fail = False
2253 self.lvm_fail = False
2254 self.hyp_fail = False
2256 self.os_fail = False
2261 def ExpandNames(self):
2262 # This raises errors.OpPrereqError on its own:
2263 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2265 # Get instances in node group; this is unsafe and needs verification later
2267 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2269 self.needed_locks = {
2270 locking.LEVEL_INSTANCE: inst_names,
2271 locking.LEVEL_NODEGROUP: [self.group_uuid],
2272 locking.LEVEL_NODE: [],
2274 # This opcode is run by watcher every five minutes and acquires all nodes
2275 # for a group. It doesn't run for a long time, so it's better to acquire
2276 # the node allocation lock as well.
2277 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2280 self.share_locks = _ShareAll()
2282 def DeclareLocks(self, level):
2283 if level == locking.LEVEL_NODE:
2284 # Get members of node group; this is unsafe and needs verification later
2285 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2287 all_inst_info = self.cfg.GetAllInstancesInfo()
2289 # In Exec(), we warn about mirrored instances that have primary and
2290 # secondary living in separate node groups. To fully verify that
2291 # volumes for these instances are healthy, we will need to do an
2292 # extra call to their secondaries. We ensure here those nodes will
2294 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295 # Important: access only the instances whose lock is owned
2296 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297 nodes.update(all_inst_info[inst].secondary_nodes)
2299 self.needed_locks[locking.LEVEL_NODE] = nodes
2301 def CheckPrereq(self):
2302 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2305 group_nodes = set(self.group_info.members)
2307 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2310 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312 unlocked_instances = \
2313 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2316 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317 utils.CommaJoin(unlocked_nodes),
2320 if unlocked_instances:
2321 raise errors.OpPrereqError("Missing lock for instances: %s" %
2322 utils.CommaJoin(unlocked_instances),
2325 self.all_node_info = self.cfg.GetAllNodesInfo()
2326 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2328 self.my_node_names = utils.NiceSort(group_nodes)
2329 self.my_inst_names = utils.NiceSort(group_instances)
2331 self.my_node_info = dict((name, self.all_node_info[name])
2332 for name in self.my_node_names)
2334 self.my_inst_info = dict((name, self.all_inst_info[name])
2335 for name in self.my_inst_names)
2337 # We detect here the nodes that will need the extra RPC calls for verifying
2338 # split LV volumes; they should be locked.
2339 extra_lv_nodes = set()
2341 for inst in self.my_inst_info.values():
2342 if inst.disk_template in constants.DTS_INT_MIRROR:
2343 for nname in inst.all_nodes:
2344 if self.all_node_info[nname].group != self.group_uuid:
2345 extra_lv_nodes.add(nname)
2347 unlocked_lv_nodes = \
2348 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2350 if unlocked_lv_nodes:
2351 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352 utils.CommaJoin(unlocked_lv_nodes),
2354 self.extra_lv_nodes = list(extra_lv_nodes)
2356 def _VerifyNode(self, ninfo, nresult):
2357 """Perform some basic validation on data returned from a node.
2359 - check the result data structure is well formed and has all the
2361 - check ganeti version
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the results from the node
2367 @return: whether overall this call was successful (and we can expect
2368 reasonable values in the respose)
2372 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374 # main result, nresult should be a non-empty dict
2375 test = not nresult or not isinstance(nresult, dict)
2376 _ErrorIf(test, constants.CV_ENODERPC, node,
2377 "unable to verify node: no data returned")
2381 # compares ganeti version
2382 local_version = constants.PROTOCOL_VERSION
2383 remote_version = nresult.get("version", None)
2384 test = not (remote_version and
2385 isinstance(remote_version, (list, tuple)) and
2386 len(remote_version) == 2)
2387 _ErrorIf(test, constants.CV_ENODERPC, node,
2388 "connection to node returned invalid data")
2392 test = local_version != remote_version[0]
2393 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394 "incompatible protocol versions: master %s,"
2395 " node %s", local_version, remote_version[0])
2399 # node seems compatible, we can actually try to look into its results
2401 # full package version
2402 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403 constants.CV_ENODEVERSION, node,
2404 "software version mismatch: master %s, node %s",
2405 constants.RELEASE_VERSION, remote_version[1],
2406 code=self.ETYPE_WARNING)
2408 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409 if ninfo.vm_capable and isinstance(hyp_result, dict):
2410 for hv_name, hv_result in hyp_result.iteritems():
2411 test = hv_result is not None
2412 _ErrorIf(test, constants.CV_ENODEHV, node,
2413 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2415 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416 if ninfo.vm_capable and isinstance(hvp_result, list):
2417 for item, hv_name, hv_result in hvp_result:
2418 _ErrorIf(True, constants.CV_ENODEHV, node,
2419 "hypervisor %s parameter verify failure (source %s): %s",
2420 hv_name, item, hv_result)
2422 test = nresult.get(constants.NV_NODESETUP,
2423 ["Missing NODESETUP results"])
2424 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2429 def _VerifyNodeTime(self, ninfo, nresult,
2430 nvinfo_starttime, nvinfo_endtime):
2431 """Check the node time.
2433 @type ninfo: L{objects.Node}
2434 @param ninfo: the node to check
2435 @param nresult: the remote results for the node
2436 @param nvinfo_starttime: the start time of the RPC call
2437 @param nvinfo_endtime: the end time of the RPC call
2441 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2443 ntime = nresult.get(constants.NV_TIME, None)
2445 ntime_merged = utils.MergeTime(ntime)
2446 except (ValueError, TypeError):
2447 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2450 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2457 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458 "Node time diverges by at least %s from master node time",
2461 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462 """Check the node LVM results and update info for cross-node checks.
2464 @type ninfo: L{objects.Node}
2465 @param ninfo: the node to check
2466 @param nresult: the remote results for the node
2467 @param vg_name: the configured VG name
2468 @type nimg: L{NodeImage}
2469 @param nimg: node image
2476 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2478 # checks vg existence and size > 20G
2479 vglist = nresult.get(constants.NV_VGLIST, None)
2481 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2483 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484 constants.MIN_VG_SIZE)
2485 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2488 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2490 self._Error(constants.CV_ENODELVM, node, em)
2491 if pvminmax is not None:
2492 (nimg.pv_min, nimg.pv_max) = pvminmax
2494 def _VerifyGroupLVM(self, node_image, vg_name):
2495 """Check cross-node consistency in LVM.
2497 @type node_image: dict
2498 @param node_image: info about nodes, mapping from node to names to
2499 L{NodeImage} objects
2500 @param vg_name: the configured VG name
2506 # Only exlcusive storage needs this kind of checks
2507 if not self._exclusive_storage:
2510 # exclusive_storage wants all PVs to have the same size (approximately),
2511 # if the smallest and the biggest ones are okay, everything is fine.
2512 # pv_min is None iff pv_max is None
2513 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2516 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520 "PV sizes differ too much in the group; smallest (%s MB) is"
2521 " on %s, biggest (%s MB) is on %s",
2522 pvmin, minnode, pvmax, maxnode)
2524 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525 """Check the node bridges.
2527 @type ninfo: L{objects.Node}
2528 @param ninfo: the node to check
2529 @param nresult: the remote results for the node
2530 @param bridges: the expected list of bridges
2537 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539 missing = nresult.get(constants.NV_BRIDGES, None)
2540 test = not isinstance(missing, list)
2541 _ErrorIf(test, constants.CV_ENODENET, node,
2542 "did not return valid bridge information")
2544 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2547 def _VerifyNodeUserScripts(self, ninfo, nresult):
2548 """Check the results of user scripts presence and executability on the node
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2557 test = not constants.NV_USERSCRIPTS in nresult
2558 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559 "did not return user scripts information")
2561 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2563 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564 "user scripts not present or not executable: %s" %
2565 utils.CommaJoin(sorted(broken_scripts)))
2567 def _VerifyNodeNetwork(self, ninfo, nresult):
2568 """Check the node network connectivity results.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2576 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2578 test = constants.NV_NODELIST not in nresult
2579 _ErrorIf(test, constants.CV_ENODESSH, node,
2580 "node hasn't returned node ssh connectivity data")
2582 if nresult[constants.NV_NODELIST]:
2583 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584 _ErrorIf(True, constants.CV_ENODESSH, node,
2585 "ssh communication with node '%s': %s", a_node, a_msg)
2587 test = constants.NV_NODENETTEST not in nresult
2588 _ErrorIf(test, constants.CV_ENODENET, node,
2589 "node hasn't returned node tcp connectivity data")
2591 if nresult[constants.NV_NODENETTEST]:
2592 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2594 _ErrorIf(True, constants.CV_ENODENET, node,
2595 "tcp communication with node '%s': %s",
2596 anode, nresult[constants.NV_NODENETTEST][anode])
2598 test = constants.NV_MASTERIP not in nresult
2599 _ErrorIf(test, constants.CV_ENODENET, node,
2600 "node hasn't returned node master IP reachability data")
2602 if not nresult[constants.NV_MASTERIP]:
2603 if node == self.master_node:
2604 msg = "the master node cannot reach the master IP (not configured?)"
2606 msg = "cannot reach the master IP"
2607 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2609 def _VerifyInstance(self, instance, inst_config, node_image,
2611 """Verify an instance.
2613 This function checks to see if the required block devices are
2614 available on the instance's node, and that the nodes are in the correct
2618 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 pnode = inst_config.primary_node
2620 pnode_img = node_image[pnode]
2621 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2623 node_vol_should = {}
2624 inst_config.MapLVsByNode(node_vol_should)
2626 cluster = self.cfg.GetClusterInfo()
2627 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2629 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631 code=self.ETYPE_WARNING)
2633 for node in node_vol_should:
2634 n_img = node_image[node]
2635 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636 # ignore missing volumes on offline or broken nodes
2638 for volume in node_vol_should[node]:
2639 test = volume not in n_img.volumes
2640 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641 "volume %s missing on node %s", volume, node)
2643 if inst_config.admin_state == constants.ADMINST_UP:
2644 test = instance not in pnode_img.instances and not pnode_img.offline
2645 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646 "instance not running on its primary node %s",
2648 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649 "instance is marked as running and lives on offline node %s",
2652 diskdata = [(nname, success, status, idx)
2653 for (nname, disks) in diskstatus.items()
2654 for idx, (success, status) in enumerate(disks)]
2656 for nname, success, bdev_status, idx in diskdata:
2657 # the 'ghost node' construction in Exec() ensures that we have a
2659 snode = node_image[nname]
2660 bad_snode = snode.ghost or snode.offline
2661 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662 not success and not bad_snode,
2663 constants.CV_EINSTANCEFAULTYDISK, instance,
2664 "couldn't retrieve status for disk/%s on %s: %s",
2665 idx, nname, bdev_status)
2666 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668 constants.CV_EINSTANCEFAULTYDISK, instance,
2669 "disk/%s on %s is faulty", idx, nname)
2671 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673 " primary node failed", instance)
2675 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676 constants.CV_EINSTANCELAYOUT,
2677 instance, "instance has multiple secondary nodes: %s",
2678 utils.CommaJoin(inst_config.secondary_nodes),
2679 code=self.ETYPE_WARNING)
2681 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682 # Disk template not compatible with exclusive_storage: no instance
2683 # node should have the flag set
2684 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685 inst_config.all_nodes)
2686 es_nodes = [n for (n, es) in es_flags.items()
2688 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689 "instance has template %s, which is not supported on nodes"
2690 " that have exclusive storage set: %s",
2691 inst_config.disk_template, utils.CommaJoin(es_nodes))
2693 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695 instance_groups = {}
2697 for node in instance_nodes:
2698 instance_groups.setdefault(self.all_node_info[node].group,
2702 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703 # Sort so that we always list the primary node first.
2704 for group, nodes in sorted(instance_groups.items(),
2705 key=lambda (_, nodes): pnode in nodes,
2708 self._ErrorIf(len(instance_groups) > 1,
2709 constants.CV_EINSTANCESPLITGROUPS,
2710 instance, "instance has primary and secondary nodes in"
2711 " different groups: %s", utils.CommaJoin(pretty_list),
2712 code=self.ETYPE_WARNING)
2714 inst_nodes_offline = []
2715 for snode in inst_config.secondary_nodes:
2716 s_img = node_image[snode]
2717 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718 snode, "instance %s, connection to secondary node failed",
2722 inst_nodes_offline.append(snode)
2724 # warn that the instance lives on offline nodes
2725 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726 "instance has offline secondary node(s) %s",
2727 utils.CommaJoin(inst_nodes_offline))
2728 # ... or ghost/non-vm_capable nodes
2729 for node in inst_config.all_nodes:
2730 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731 instance, "instance lives on ghost node %s", node)
2732 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733 instance, "instance lives on non-vm_capable node %s", node)
2735 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736 """Verify if there are any unknown volumes in the cluster.
2738 The .os, .swap and backup volumes are ignored. All other volumes are
2739 reported as unknown.
2741 @type reserved: L{ganeti.utils.FieldSet}
2742 @param reserved: a FieldSet of reserved volume names
2745 for node, n_img in node_image.items():
2746 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747 self.all_node_info[node].group != self.group_uuid):
2748 # skip non-healthy nodes
2750 for volume in n_img.volumes:
2751 test = ((node not in node_vol_should or
2752 volume not in node_vol_should[node]) and
2753 not reserved.Matches(volume))
2754 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755 "volume %s is unknown", volume)
2757 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758 """Verify N+1 Memory Resilience.
2760 Check that if one single node dies we can still start all the
2761 instances it was primary for.
2764 cluster_info = self.cfg.GetClusterInfo()
2765 for node, n_img in node_image.items():
2766 # This code checks that every node which is now listed as
2767 # secondary has enough memory to host all instances it is
2768 # supposed to should a single other node in the cluster fail.
2769 # FIXME: not ready for failover to an arbitrary node
2770 # FIXME: does not support file-backed instances
2771 # WARNING: we currently take into account down instances as well
2772 # as up ones, considering that even if they're down someone
2773 # might want to start them even in the event of a node failure.
2774 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775 # we're skipping nodes marked offline and nodes in other groups from
2776 # the N+1 warning, since most likely we don't have good memory
2777 # infromation from them; we already list instances living on such
2778 # nodes, and that's enough warning
2780 #TODO(dynmem): also consider ballooning out other instances
2781 for prinode, instances in n_img.sbp.items():
2783 for instance in instances:
2784 bep = cluster_info.FillBE(instance_cfg[instance])
2785 if bep[constants.BE_AUTO_BALANCE]:
2786 needed_mem += bep[constants.BE_MINMEM]
2787 test = n_img.mfree < needed_mem
2788 self._ErrorIf(test, constants.CV_ENODEN1, node,
2789 "not enough memory to accomodate instance failovers"
2790 " should node %s fail (%dMiB needed, %dMiB available)",
2791 prinode, needed_mem, n_img.mfree)
2794 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795 (files_all, files_opt, files_mc, files_vm)):
2796 """Verifies file checksums collected from all nodes.
2798 @param errorif: Callback for reporting errors
2799 @param nodeinfo: List of L{objects.Node} objects
2800 @param master_node: Name of master node
2801 @param all_nvinfo: RPC results
2804 # Define functions determining which nodes to consider for a file
2807 (files_mc, lambda node: (node.master_candidate or
2808 node.name == master_node)),
2809 (files_vm, lambda node: node.vm_capable),
2812 # Build mapping from filename to list of nodes which should have the file
2814 for (files, fn) in files2nodefn:
2816 filenodes = nodeinfo
2818 filenodes = filter(fn, nodeinfo)
2819 nodefiles.update((filename,
2820 frozenset(map(operator.attrgetter("name"), filenodes)))
2821 for filename in files)
2823 assert set(nodefiles) == (files_all | files_mc | files_vm)
2825 fileinfo = dict((filename, {}) for filename in nodefiles)
2826 ignore_nodes = set()
2828 for node in nodeinfo:
2830 ignore_nodes.add(node.name)
2833 nresult = all_nvinfo[node.name]
2835 if nresult.fail_msg or not nresult.payload:
2838 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840 for (key, value) in fingerprints.items())
2843 test = not (node_files and isinstance(node_files, dict))
2844 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845 "Node did not return file checksum data")
2847 ignore_nodes.add(node.name)
2850 # Build per-checksum mapping from filename to nodes having it
2851 for (filename, checksum) in node_files.items():
2852 assert filename in nodefiles
2853 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2855 for (filename, checksums) in fileinfo.items():
2856 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2858 # Nodes having the file
2859 with_file = frozenset(node_name
2860 for nodes in fileinfo[filename].values()
2861 for node_name in nodes) - ignore_nodes
2863 expected_nodes = nodefiles[filename] - ignore_nodes
2865 # Nodes missing file
2866 missing_file = expected_nodes - with_file
2868 if filename in files_opt:
2870 errorif(missing_file and missing_file != expected_nodes,
2871 constants.CV_ECLUSTERFILECHECK, None,
2872 "File %s is optional, but it must exist on all or no"
2873 " nodes (not found on %s)",
2874 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2876 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877 "File %s is missing from node(s) %s", filename,
2878 utils.CommaJoin(utils.NiceSort(missing_file)))
2880 # Warn if a node has a file it shouldn't
2881 unexpected = with_file - expected_nodes
2883 constants.CV_ECLUSTERFILECHECK, None,
2884 "File %s should not exist on node(s) %s",
2885 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2887 # See if there are multiple versions of the file
2888 test = len(checksums) > 1
2890 variants = ["variant %s on %s" %
2891 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892 for (idx, (checksum, nodes)) in
2893 enumerate(sorted(checksums.items()))]
2897 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898 "File %s found with %s different checksums (%s)",
2899 filename, len(checksums), "; ".join(variants))
2901 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2903 """Verifies and the node DRBD status.
2905 @type ninfo: L{objects.Node}
2906 @param ninfo: the node to check
2907 @param nresult: the remote results for the node
2908 @param instanceinfo: the dict of instances
2909 @param drbd_helper: the configured DRBD usermode helper
2910 @param drbd_map: the DRBD map as returned by
2911 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2915 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2918 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919 test = (helper_result is None)
2920 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921 "no drbd usermode helper returned")
2923 status, payload = helper_result
2925 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926 "drbd usermode helper check unsuccessful: %s", payload)
2927 test = status and (payload != drbd_helper)
2928 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929 "wrong drbd usermode helper: %s", payload)
2931 # compute the DRBD minors
2933 for minor, instance in drbd_map[node].items():
2934 test = instance not in instanceinfo
2935 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936 "ghost instance '%s' in temporary DRBD map", instance)
2937 # ghost instance should not be running, but otherwise we
2938 # don't give double warnings (both ghost instance and
2939 # unallocated minor in use)
2941 node_drbd[minor] = (instance, False)
2943 instance = instanceinfo[instance]
2944 node_drbd[minor] = (instance.name,
2945 instance.admin_state == constants.ADMINST_UP)
2947 # and now check them
2948 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949 test = not isinstance(used_minors, (tuple, list))
2950 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951 "cannot parse drbd status file: %s", str(used_minors))
2953 # we cannot check drbd status
2956 for minor, (iname, must_exist) in node_drbd.items():
2957 test = minor not in used_minors and must_exist
2958 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959 "drbd minor %d of instance %s is not active", minor, iname)
2960 for minor in used_minors:
2961 test = minor not in node_drbd
2962 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963 "unallocated drbd minor %d is in use", minor)
2965 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966 """Builds the node OS structures.
2968 @type ninfo: L{objects.Node}
2969 @param ninfo: the node to check
2970 @param nresult: the remote results for the node
2971 @param nimg: the node image object
2975 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2977 remote_os = nresult.get(constants.NV_OSLIST, None)
2978 test = (not isinstance(remote_os, list) or
2979 not compat.all(isinstance(v, list) and len(v) == 7
2980 for v in remote_os))
2982 _ErrorIf(test, constants.CV_ENODEOS, node,
2983 "node hasn't returned valid OS data")
2992 for (name, os_path, status, diagnose,
2993 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2995 if name not in os_dict:
2998 # parameters is a list of lists instead of list of tuples due to
2999 # JSON lacking a real tuple type, fix it:
3000 parameters = [tuple(v) for v in parameters]
3001 os_dict[name].append((os_path, status, diagnose,
3002 set(variants), set(parameters), set(api_ver)))
3004 nimg.oslist = os_dict
3006 def _VerifyNodeOS(self, ninfo, nimg, base):
3007 """Verifies the node OS list.
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nimg: the node image object
3012 @param base: the 'template' node we match against (e.g. from the master)
3016 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3020 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021 for os_name, os_data in nimg.oslist.items():
3022 assert os_data, "Empty OS status for OS %s?!" % os_name
3023 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027 "OS '%s' has multiple entries (first one shadows the rest): %s",
3028 os_name, utils.CommaJoin([v[0] for v in os_data]))
3029 # comparisons with the 'base' image
3030 test = os_name not in base.oslist
3031 _ErrorIf(test, constants.CV_ENODEOS, node,
3032 "Extra OS %s not present on reference node (%s)",
3036 assert base.oslist[os_name], "Base node has empty OS status?"
3037 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3039 # base OS is invalid, skipping
3041 for kind, a, b in [("API version", f_api, b_api),
3042 ("variants list", f_var, b_var),
3043 ("parameters", beautify_params(f_param),
3044 beautify_params(b_param))]:
3045 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047 kind, os_name, base.name,
3048 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3050 # check any missing OSes
3051 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052 _ErrorIf(missing, constants.CV_ENODEOS, node,
3053 "OSes present on reference node %s but missing on this node: %s",
3054 base.name, utils.CommaJoin(missing))
3056 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3059 @type ninfo: L{objects.Node}
3060 @param ninfo: the node to check
3061 @param nresult: the remote results for the node
3062 @type is_master: bool
3063 @param is_master: Whether node is the master node
3069 (constants.ENABLE_FILE_STORAGE or
3070 constants.ENABLE_SHARED_FILE_STORAGE)):
3072 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3074 # This should never happen
3075 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076 "Node did not return forbidden file storage paths")
3078 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079 "Found forbidden file storage paths: %s",
3080 utils.CommaJoin(fspaths))
3082 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083 constants.CV_ENODEFILESTORAGEPATHS, node,
3084 "Node should not have returned forbidden file storage"
3087 def _VerifyOob(self, ninfo, nresult):
3088 """Verifies out of band functionality of a node.
3090 @type ninfo: L{objects.Node}
3091 @param ninfo: the node to check
3092 @param nresult: the remote results for the node
3096 # We just have to verify the paths on master and/or master candidates
3097 # as the oob helper is invoked on the master
3098 if ((ninfo.master_candidate or ninfo.master_capable) and
3099 constants.NV_OOB_PATHS in nresult):
3100 for path_result in nresult[constants.NV_OOB_PATHS]:
3101 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3103 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104 """Verifies and updates the node volume data.
3106 This function will update a L{NodeImage}'s internal structures
3107 with data from the remote call.
3109 @type ninfo: L{objects.Node}
3110 @param ninfo: the node to check
3111 @param nresult: the remote results for the node
3112 @param nimg: the node image object
3113 @param vg_name: the configured VG name
3117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3119 nimg.lvm_fail = True
3120 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3123 elif isinstance(lvdata, basestring):
3124 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125 utils.SafeEncode(lvdata))
3126 elif not isinstance(lvdata, dict):
3127 _ErrorIf(True, constants.CV_ENODELVM, node,
3128 "rpc call to node failed (lvlist)")
3130 nimg.volumes = lvdata
3131 nimg.lvm_fail = False
3133 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134 """Verifies and updates the node instance list.
3136 If the listing was successful, then updates this node's instance
3137 list. Otherwise, it marks the RPC call as failed for the instance
3140 @type ninfo: L{objects.Node}
3141 @param ninfo: the node to check
3142 @param nresult: the remote results for the node
3143 @param nimg: the node image object
3146 idata = nresult.get(constants.NV_INSTANCELIST, None)
3147 test = not isinstance(idata, list)
3148 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149 "rpc call to node failed (instancelist): %s",
3150 utils.SafeEncode(str(idata)))
3152 nimg.hyp_fail = True
3154 nimg.instances = idata
3156 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157 """Verifies and computes a node information map
3159 @type ninfo: L{objects.Node}
3160 @param ninfo: the node to check
3161 @param nresult: the remote results for the node
3162 @param nimg: the node image object
3163 @param vg_name: the configured VG name
3167 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3169 # try to read free memory (from the hypervisor)
3170 hv_info = nresult.get(constants.NV_HVINFO, None)
3171 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172 _ErrorIf(test, constants.CV_ENODEHV, node,
3173 "rpc call to node failed (hvinfo)")
3176 nimg.mfree = int(hv_info["memory_free"])
3177 except (ValueError, TypeError):
3178 _ErrorIf(True, constants.CV_ENODERPC, node,
3179 "node returned invalid nodeinfo, check hypervisor")
3181 # FIXME: devise a free space model for file based instances as well
3182 if vg_name is not None:
3183 test = (constants.NV_VGLIST not in nresult or
3184 vg_name not in nresult[constants.NV_VGLIST])
3185 _ErrorIf(test, constants.CV_ENODELVM, node,
3186 "node didn't return data for the volume group '%s'"
3187 " - it is either missing or broken", vg_name)
3190 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191 except (ValueError, TypeError):
3192 _ErrorIf(True, constants.CV_ENODERPC, node,
3193 "node returned invalid LVM info, check LVM status")
3195 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196 """Gets per-disk status information for all instances.
3198 @type nodelist: list of strings
3199 @param nodelist: Node names
3200 @type node_image: dict of (name, L{objects.Node})
3201 @param node_image: Node objects
3202 @type instanceinfo: dict of (name, L{objects.Instance})
3203 @param instanceinfo: Instance objects
3204 @rtype: {instance: {node: [(succes, payload)]}}
3205 @return: a dictionary of per-instance dictionaries with nodes as
3206 keys and disk information as values; the disk information is a
3207 list of tuples (success, payload)
3210 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3213 node_disks_devonly = {}
3214 diskless_instances = set()
3215 diskless = constants.DT_DISKLESS
3217 for nname in nodelist:
3218 node_instances = list(itertools.chain(node_image[nname].pinst,
3219 node_image[nname].sinst))
3220 diskless_instances.update(inst for inst in node_instances
3221 if instanceinfo[inst].disk_template == diskless)
3222 disks = [(inst, disk)
3223 for inst in node_instances
3224 for disk in instanceinfo[inst].disks]
3227 # No need to collect data
3230 node_disks[nname] = disks
3232 # _AnnotateDiskParams makes already copies of the disks
3234 for (inst, dev) in disks:
3235 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236 self.cfg.SetDiskID(anno_disk, nname)
3237 devonly.append(anno_disk)
3239 node_disks_devonly[nname] = devonly
3241 assert len(node_disks) == len(node_disks_devonly)
3243 # Collect data from all nodes with disks
3244 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3247 assert len(result) == len(node_disks)
3251 for (nname, nres) in result.items():
3252 disks = node_disks[nname]
3255 # No data from this node
3256 data = len(disks) * [(False, "node offline")]
3259 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260 "while getting disk information: %s", msg)
3262 # No data from this node
3263 data = len(disks) * [(False, msg)]
3266 for idx, i in enumerate(nres.payload):
3267 if isinstance(i, (tuple, list)) and len(i) == 2:
3270 logging.warning("Invalid result from node %s, entry %d: %s",
3272 data.append((False, "Invalid result from the remote node"))
3274 for ((inst, _), status) in zip(disks, data):
3275 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3277 # Add empty entries for diskless instances.
3278 for inst in diskless_instances:
3279 assert inst not in instdisk
3282 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284 compat.all(isinstance(s, (tuple, list)) and
3285 len(s) == 2 for s in statuses)
3286 for inst, nnames in instdisk.items()
3287 for nname, statuses in nnames.items())
3289 instdisk_keys = set(instdisk)
3290 instanceinfo_keys = set(instanceinfo)
3291 assert instdisk_keys == instanceinfo_keys, \
3292 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293 (instdisk_keys, instanceinfo_keys))
3298 def _SshNodeSelector(group_uuid, all_nodes):
3299 """Create endless iterators for all potential SSH check hosts.
3302 nodes = [node for node in all_nodes
3303 if (node.group != group_uuid and
3305 keyfunc = operator.attrgetter("group")
3307 return map(itertools.cycle,
3308 [sorted(map(operator.attrgetter("name"), names))
3309 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3313 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314 """Choose which nodes should talk to which other nodes.
3316 We will make nodes contact all nodes in their group, and one node from
3319 @warning: This algorithm has a known issue if one node group is much
3320 smaller than others (e.g. just one node). In such a case all other
3321 nodes will talk to the single node.
3324 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3327 return (online_nodes,
3328 dict((name, sorted([i.next() for i in sel]))
3329 for name in online_nodes))
3331 def BuildHooksEnv(self):
3334 Cluster-Verify hooks just ran in the post phase and their failure makes
3335 the output be logged in the verify output and the verification to fail.
3339 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3342 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343 for node in self.my_node_info.values())
3347 def BuildHooksNodes(self):
3348 """Build hooks nodes.
3351 return ([], self.my_node_names)
3353 def Exec(self, feedback_fn):
3354 """Verify integrity of the node group, performing various test on nodes.
3357 # This method has too many local variables. pylint: disable=R0914
3358 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3360 if not self.my_node_names:
3362 feedback_fn("* Empty node group, skipping verification")
3366 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367 verbose = self.op.verbose
3368 self._feedback_fn = feedback_fn
3370 vg_name = self.cfg.GetVGName()
3371 drbd_helper = self.cfg.GetDRBDHelper()
3372 cluster = self.cfg.GetClusterInfo()
3373 hypervisors = cluster.enabled_hypervisors
3374 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3376 i_non_redundant = [] # Non redundant instances
3377 i_non_a_balanced = [] # Non auto-balanced instances
3378 i_offline = 0 # Count of offline instances
3379 n_offline = 0 # Count of offline nodes
3380 n_drained = 0 # Count of nodes being drained
3381 node_vol_should = {}
3383 # FIXME: verify OS list
3386 filemap = _ComputeAncillaryFiles(cluster, False)
3388 # do local checksums
3389 master_node = self.master_node = self.cfg.GetMasterNode()
3390 master_ip = self.cfg.GetMasterIP()
3392 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3395 if self.cfg.GetUseExternalMipScript():
3396 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3398 node_verify_param = {
3399 constants.NV_FILELIST:
3400 map(vcluster.MakeVirtualPath,
3401 utils.UniqueSequence(filename
3402 for files in filemap
3403 for filename in files)),
3404 constants.NV_NODELIST:
3405 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406 self.all_node_info.values()),
3407 constants.NV_HYPERVISOR: hypervisors,
3408 constants.NV_HVPARAMS:
3409 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411 for node in node_data_list
3412 if not node.offline],
3413 constants.NV_INSTANCELIST: hypervisors,
3414 constants.NV_VERSION: None,
3415 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416 constants.NV_NODESETUP: None,
3417 constants.NV_TIME: None,
3418 constants.NV_MASTERIP: (master_node, master_ip),
3419 constants.NV_OSLIST: None,
3420 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421 constants.NV_USERSCRIPTS: user_scripts,
3424 if vg_name is not None:
3425 node_verify_param[constants.NV_VGLIST] = None
3426 node_verify_param[constants.NV_LVLIST] = vg_name
3427 node_verify_param[constants.NV_PVLIST] = [vg_name]
3430 node_verify_param[constants.NV_DRBDLIST] = None
3431 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3433 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434 # Load file storage paths only from master node
3435 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3438 # FIXME: this needs to be changed per node-group, not cluster-wide
3440 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442 bridges.add(default_nicpp[constants.NIC_LINK])
3443 for instance in self.my_inst_info.values():
3444 for nic in instance.nics:
3445 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447 bridges.add(full_nic[constants.NIC_LINK])
3450 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3452 # Build our expected cluster state
3453 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3455 vm_capable=node.vm_capable))
3456 for node in node_data_list)
3460 for node in self.all_node_info.values():
3461 path = _SupportsOob(self.cfg, node)
3462 if path and path not in oob_paths:
3463 oob_paths.append(path)
3466 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3468 for instance in self.my_inst_names:
3469 inst_config = self.my_inst_info[instance]
3470 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3473 for nname in inst_config.all_nodes:
3474 if nname not in node_image:
3475 gnode = self.NodeImage(name=nname)
3476 gnode.ghost = (nname not in self.all_node_info)
3477 node_image[nname] = gnode
3479 inst_config.MapLVsByNode(node_vol_should)
3481 pnode = inst_config.primary_node
3482 node_image[pnode].pinst.append(instance)
3484 for snode in inst_config.secondary_nodes:
3485 nimg = node_image[snode]
3486 nimg.sinst.append(instance)
3487 if pnode not in nimg.sbp:
3488 nimg.sbp[pnode] = []
3489 nimg.sbp[pnode].append(instance)
3491 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492 # The value of exclusive_storage should be the same across the group, so if
3493 # it's True for at least a node, we act as if it were set for all the nodes
3494 self._exclusive_storage = compat.any(es_flags.values())
3495 if self._exclusive_storage:
3496 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3498 # At this point, we have the in-memory data structures complete,
3499 # except for the runtime information, which we'll gather next
3501 # Due to the way our RPC system works, exact response times cannot be
3502 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503 # time before and after executing the request, we can at least have a time
3505 nvinfo_starttime = time.time()
3506 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3508 self.cfg.GetClusterName())
3509 nvinfo_endtime = time.time()
3511 if self.extra_lv_nodes and vg_name is not None:
3513 self.rpc.call_node_verify(self.extra_lv_nodes,
3514 {constants.NV_LVLIST: vg_name},
3515 self.cfg.GetClusterName())
3517 extra_lv_nvinfo = {}
3519 all_drbd_map = self.cfg.ComputeDRBDMap()
3521 feedback_fn("* Gathering disk information (%s nodes)" %
3522 len(self.my_node_names))
3523 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3526 feedback_fn("* Verifying configuration file consistency")
3528 # If not all nodes are being checked, we need to make sure the master node
3529 # and a non-checked vm_capable node are in the list.
3530 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3532 vf_nvinfo = all_nvinfo.copy()
3533 vf_node_info = list(self.my_node_info.values())
3534 additional_nodes = []
3535 if master_node not in self.my_node_info:
3536 additional_nodes.append(master_node)
3537 vf_node_info.append(self.all_node_info[master_node])
3538 # Add the first vm_capable node we find which is not included,
3539 # excluding the master node (which we already have)
3540 for node in absent_nodes:
3541 nodeinfo = self.all_node_info[node]
3542 if (nodeinfo.vm_capable and not nodeinfo.offline and
3543 node != master_node):
3544 additional_nodes.append(node)
3545 vf_node_info.append(self.all_node_info[node])
3547 key = constants.NV_FILELIST
3548 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549 {key: node_verify_param[key]},
3550 self.cfg.GetClusterName()))
3552 vf_nvinfo = all_nvinfo
3553 vf_node_info = self.my_node_info.values()
3555 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3557 feedback_fn("* Verifying node status")
3561 for node_i in node_data_list:
3563 nimg = node_image[node]
3567 feedback_fn("* Skipping offline node %s" % (node,))
3571 if node == master_node:
3573 elif node_i.master_candidate:
3574 ntype = "master candidate"
3575 elif node_i.drained:
3581 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3583 msg = all_nvinfo[node].fail_msg
3584 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3587 nimg.rpc_fail = True
3590 nresult = all_nvinfo[node].payload
3592 nimg.call_ok = self._VerifyNode(node_i, nresult)
3593 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594 self._VerifyNodeNetwork(node_i, nresult)
3595 self._VerifyNodeUserScripts(node_i, nresult)
3596 self._VerifyOob(node_i, nresult)
3597 self._VerifyFileStoragePaths(node_i, nresult,
3598 node == master_node)
3601 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3605 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606 self._UpdateNodeInstances(node_i, nresult, nimg)
3607 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608 self._UpdateNodeOS(node_i, nresult, nimg)
3610 if not nimg.os_fail:
3611 if refos_img is None:
3613 self._VerifyNodeOS(node_i, nimg, refos_img)
3614 self._VerifyNodeBridges(node_i, nresult, bridges)
3616 # Check whether all running instancies are primary for the node. (This
3617 # can no longer be done from _VerifyInstance below, since some of the
3618 # wrong instances could be from other node groups.)
3619 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3621 for inst in non_primary_inst:
3622 test = inst in self.all_inst_info
3623 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624 "instance should not run on node %s", node_i.name)
3625 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626 "node is running unknown instance %s", inst)
3628 self._VerifyGroupLVM(node_image, vg_name)
3630 for node, result in extra_lv_nvinfo.items():
3631 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632 node_image[node], vg_name)
3634 feedback_fn("* Verifying instance status")
3635 for instance in self.my_inst_names:
3637 feedback_fn("* Verifying instance %s" % instance)
3638 inst_config = self.my_inst_info[instance]
3639 self._VerifyInstance(instance, inst_config, node_image,
3642 # If the instance is non-redundant we cannot survive losing its primary
3643 # node, so we are not N+1 compliant.
3644 if inst_config.disk_template not in constants.DTS_MIRRORED:
3645 i_non_redundant.append(instance)
3647 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648 i_non_a_balanced.append(instance)
3650 feedback_fn("* Verifying orphan volumes")
3651 reserved = utils.FieldSet(*cluster.reserved_lvs)
3653 # We will get spurious "unknown volume" warnings if any node of this group
3654 # is secondary for an instance whose primary is in another group. To avoid
3655 # them, we find these instances and add their volumes to node_vol_should.
3656 for inst in self.all_inst_info.values():
3657 for secondary in inst.secondary_nodes:
3658 if (secondary in self.my_node_info
3659 and inst.name not in self.my_inst_info):
3660 inst.MapLVsByNode(node_vol_should)
3663 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3665 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666 feedback_fn("* Verifying N+1 Memory redundancy")
3667 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3669 feedback_fn("* Other Notes")
3671 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3672 % len(i_non_redundant))
3674 if i_non_a_balanced:
3675 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3676 % len(i_non_a_balanced))
3679 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3682 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3685 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3689 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690 """Analyze the post-hooks' result
3692 This method analyses the hook result, handles it, and sends some
3693 nicely-formatted feedback back to the user.
3695 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697 @param hooks_results: the results of the multi-node hooks rpc call
3698 @param feedback_fn: function used send feedback back to the caller
3699 @param lu_result: previous Exec result
3700 @return: the new Exec result, based on the previous result
3704 # We only really run POST phase hooks, only for non-empty groups,
3705 # and are only interested in their results
3706 if not self.my_node_names:
3709 elif phase == constants.HOOKS_PHASE_POST:
3710 # Used to change hooks' output to proper indentation
3711 feedback_fn("* Hooks Results")
3712 assert hooks_results, "invalid result from hooks"
3714 for node_name in hooks_results:
3715 res = hooks_results[node_name]
3717 test = msg and not res.offline
3718 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719 "Communication failure in hooks execution: %s", msg)
3720 if res.offline or msg:
3721 # No need to investigate payload if node is offline or gave
3724 for script, hkr, output in res.payload:
3725 test = hkr == constants.HKR_FAIL
3726 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727 "Script %s failed, output:", script)
3729 output = self._HOOKS_INDENT_RE.sub(" ", output)
3730 feedback_fn("%s" % output)
3736 class LUClusterVerifyDisks(NoHooksLU):
3737 """Verifies the cluster disks status.
3742 def ExpandNames(self):
3743 self.share_locks = _ShareAll()
3744 self.needed_locks = {
3745 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3748 def Exec(self, feedback_fn):
3749 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3751 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753 for group in group_names])
3756 class LUGroupVerifyDisks(NoHooksLU):
3757 """Verifies the status of all disks in a node group.
3762 def ExpandNames(self):
3763 # Raises errors.OpPrereqError on its own if group can't be found
3764 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3766 self.share_locks = _ShareAll()
3767 self.needed_locks = {
3768 locking.LEVEL_INSTANCE: [],
3769 locking.LEVEL_NODEGROUP: [],
3770 locking.LEVEL_NODE: [],
3772 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773 # starts one instance of this opcode for every group, which means all
3774 # nodes will be locked for a short amount of time, so it's better to
3775 # acquire the node allocation lock as well.
3776 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3779 def DeclareLocks(self, level):
3780 if level == locking.LEVEL_INSTANCE:
3781 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3783 # Lock instances optimistically, needs verification once node and group
3784 # locks have been acquired
3785 self.needed_locks[locking.LEVEL_INSTANCE] = \
3786 self.cfg.GetNodeGroupInstances(self.group_uuid)
3788 elif level == locking.LEVEL_NODEGROUP:
3789 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3791 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792 set([self.group_uuid] +
3793 # Lock all groups used by instances optimistically; this requires
3794 # going via the node before it's locked, requiring verification
3797 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3800 elif level == locking.LEVEL_NODE:
3801 # This will only lock the nodes in the group to be verified which contain
3803 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804 self._LockInstancesNodes()
3806 # Lock all nodes in group to be verified
3807 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3811 def CheckPrereq(self):
3812 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3816 assert self.group_uuid in owned_groups
3818 # Check if locked instances are still correct
3819 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3821 # Get instance information
3822 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3824 # Check if node groups for locked instances are still correct
3825 _CheckInstancesNodeGroups(self.cfg, self.instances,
3826 owned_groups, owned_nodes, self.group_uuid)
3828 def Exec(self, feedback_fn):
3829 """Verify integrity of cluster disks.
3831 @rtype: tuple of three items
3832 @return: a tuple of (dict of node-to-node_error, list of instances
3833 which need activate-disks, dict of instance: (node, volume) for
3838 res_instances = set()
3841 nv_dict = _MapInstanceDisksToNodes(
3842 [inst for inst in self.instances.values()
3843 if inst.admin_state == constants.ADMINST_UP])
3846 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847 set(self.cfg.GetVmCapableNodeList()))
3849 node_lvs = self.rpc.call_lv_list(nodes, [])
3851 for (node, node_res) in node_lvs.items():
3852 if node_res.offline:
3855 msg = node_res.fail_msg
3857 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858 res_nodes[node] = msg
3861 for lv_name, (_, _, lv_online) in node_res.payload.items():
3862 inst = nv_dict.pop((node, lv_name), None)
3863 if not (lv_online or inst is None):
3864 res_instances.add(inst)
3866 # any leftover items in nv_dict are missing LVs, let's arrange the data
3868 for key, inst in nv_dict.iteritems():
3869 res_missing.setdefault(inst, []).append(list(key))
3871 return (res_nodes, list(res_instances), res_missing)
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875 """Verifies the cluster disks sizes.
3880 def ExpandNames(self):
3881 if self.op.instances:
3882 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883 # Not getting the node allocation lock as only a specific set of
3884 # instances (and their nodes) is going to be acquired
3885 self.needed_locks = {
3886 locking.LEVEL_NODE_RES: [],
3887 locking.LEVEL_INSTANCE: self.wanted_names,
3889 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3891 self.wanted_names = None
3892 self.needed_locks = {
3893 locking.LEVEL_NODE_RES: locking.ALL_SET,
3894 locking.LEVEL_INSTANCE: locking.ALL_SET,
3896 # This opcode is acquires the node locks for all instances
3897 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3900 self.share_locks = {
3901 locking.LEVEL_NODE_RES: 1,
3902 locking.LEVEL_INSTANCE: 0,
3903 locking.LEVEL_NODE_ALLOC: 1,
3906 def DeclareLocks(self, level):
3907 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908 self._LockInstancesNodes(primary_only=True, level=level)
3910 def CheckPrereq(self):
3911 """Check prerequisites.
3913 This only checks the optional instance list against the existing names.
3916 if self.wanted_names is None:
3917 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3919 self.wanted_instances = \
3920 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3922 def _EnsureChildSizes(self, disk):
3923 """Ensure children of the disk have the needed disk size.
3925 This is valid mainly for DRBD8 and fixes an issue where the
3926 children have smaller disk size.
3928 @param disk: an L{ganeti.objects.Disk} object
3931 if disk.dev_type == constants.LD_DRBD8:
3932 assert disk.children, "Empty children for DRBD8?"
3933 fchild = disk.children[0]
3934 mismatch = fchild.size < disk.size
3936 self.LogInfo("Child disk has size %d, parent %d, fixing",
3937 fchild.size, disk.size)
3938 fchild.size = disk.size
3940 # and we recurse on this child only, not on the metadev
3941 return self._EnsureChildSizes(fchild) or mismatch
3945 def Exec(self, feedback_fn):
3946 """Verify the size of cluster disks.
3949 # TODO: check child disks too
3950 # TODO: check differences in size between primary/secondary nodes
3952 for instance in self.wanted_instances:
3953 pnode = instance.primary_node
3954 if pnode not in per_node_disks:
3955 per_node_disks[pnode] = []
3956 for idx, disk in enumerate(instance.disks):
3957 per_node_disks[pnode].append((instance, idx, disk))
3959 assert not (frozenset(per_node_disks.keys()) -
3960 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961 "Not owning correct locks"
3962 assert not self.owned_locks(locking.LEVEL_NODE)
3965 for node, dskl in per_node_disks.items():
3966 newl = [v[2].Copy() for v in dskl]
3968 self.cfg.SetDiskID(dsk, node)
3969 result = self.rpc.call_blockdev_getsize(node, newl)
3971 self.LogWarning("Failure in blockdev_getsize call to node"
3972 " %s, ignoring", node)
3974 if len(result.payload) != len(dskl):
3975 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976 " result.payload=%s", node, len(dskl), result.payload)
3977 self.LogWarning("Invalid result from node %s, ignoring node results",
3980 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3982 self.LogWarning("Disk %d of instance %s did not return size"
3983 " information, ignoring", idx, instance.name)
3985 if not isinstance(size, (int, long)):
3986 self.LogWarning("Disk %d of instance %s did not return valid"
3987 " size information, ignoring", idx, instance.name)
3990 if size != disk.size:
3991 self.LogInfo("Disk %d of instance %s has mismatched size,"
3992 " correcting: recorded %d, actual %d", idx,
3993 instance.name, disk.size, size)
3995 self.cfg.Update(instance, feedback_fn)
3996 changed.append((instance.name, idx, size))
3997 if self._EnsureChildSizes(disk):
3998 self.cfg.Update(instance, feedback_fn)
3999 changed.append((instance.name, idx, disk.size))
4003 class LUClusterRename(LogicalUnit):
4004 """Rename the cluster.
4007 HPATH = "cluster-rename"
4008 HTYPE = constants.HTYPE_CLUSTER
4010 def BuildHooksEnv(self):
4015 "OP_TARGET": self.cfg.GetClusterName(),
4016 "NEW_NAME": self.op.name,
4019 def BuildHooksNodes(self):
4020 """Build hooks nodes.
4023 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4025 def CheckPrereq(self):
4026 """Verify that the passed name is a valid one.
4029 hostname = netutils.GetHostname(name=self.op.name,
4030 family=self.cfg.GetPrimaryIPFamily())
4032 new_name = hostname.name
4033 self.ip = new_ip = hostname.ip
4034 old_name = self.cfg.GetClusterName()
4035 old_ip = self.cfg.GetMasterIP()
4036 if new_name == old_name and new_ip == old_ip:
4037 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038 " cluster has changed",
4040 if new_ip != old_ip:
4041 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043 " reachable on the network" %
4044 new_ip, errors.ECODE_NOTUNIQUE)
4046 self.op.name = new_name
4048 def Exec(self, feedback_fn):
4049 """Rename the cluster.
4052 clustername = self.op.name
4055 # shutdown the master IP
4056 master_params = self.cfg.GetMasterNetworkParameters()
4057 ems = self.cfg.GetUseExternalMipScript()
4058 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4060 result.Raise("Could not disable the master role")
4063 cluster = self.cfg.GetClusterInfo()
4064 cluster.cluster_name = clustername
4065 cluster.master_ip = new_ip
4066 self.cfg.Update(cluster, feedback_fn)
4068 # update the known hosts file
4069 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070 node_list = self.cfg.GetOnlineNodeList()
4072 node_list.remove(master_params.name)
4075 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4077 master_params.ip = new_ip
4078 result = self.rpc.call_node_activate_master_ip(master_params.name,
4080 msg = result.fail_msg
4082 self.LogWarning("Could not re-enable the master role on"
4083 " the master, please restart manually: %s", msg)
4088 def _ValidateNetmask(cfg, netmask):
4089 """Checks if a netmask is valid.
4091 @type cfg: L{config.ConfigWriter}
4092 @param cfg: The cluster configuration
4094 @param netmask: the netmask to be verified
4095 @raise errors.OpPrereqError: if the validation fails
4098 ip_family = cfg.GetPrimaryIPFamily()
4100 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101 except errors.ProgrammerError:
4102 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103 ip_family, errors.ECODE_INVAL)
4104 if not ipcls.ValidateNetmask(netmask):
4105 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106 (netmask), errors.ECODE_INVAL)
4109 class LUClusterSetParams(LogicalUnit):
4110 """Change the parameters of the cluster.
4113 HPATH = "cluster-modify"
4114 HTYPE = constants.HTYPE_CLUSTER
4117 def CheckArguments(self):
4121 if self.op.uid_pool:
4122 uidpool.CheckUidPool(self.op.uid_pool)
4124 if self.op.add_uids:
4125 uidpool.CheckUidPool(self.op.add_uids)
4127 if self.op.remove_uids:
4128 uidpool.CheckUidPool(self.op.remove_uids)
4130 if self.op.master_netmask is not None:
4131 _ValidateNetmask(self.cfg, self.op.master_netmask)
4133 if self.op.diskparams:
4134 for dt_params in self.op.diskparams.values():
4135 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4137 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138 except errors.OpPrereqError, err:
4139 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4142 def ExpandNames(self):
4143 # FIXME: in the future maybe other cluster params won't require checking on
4144 # all nodes to be modified.
4145 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146 # resource locks the right thing, shouldn't it be the BGL instead?
4147 self.needed_locks = {
4148 locking.LEVEL_NODE: locking.ALL_SET,
4149 locking.LEVEL_INSTANCE: locking.ALL_SET,
4150 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4153 self.share_locks = _ShareAll()
4155 def BuildHooksEnv(self):
4160 "OP_TARGET": self.cfg.GetClusterName(),
4161 "NEW_VG_NAME": self.op.vg_name,
4164 def BuildHooksNodes(self):
4165 """Build hooks nodes.
4168 mn = self.cfg.GetMasterNode()
4171 def CheckPrereq(self):
4172 """Check prerequisites.
4174 This checks whether the given params don't conflict and
4175 if the given volume group is valid.
4178 if self.op.vg_name is not None and not self.op.vg_name:
4179 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181 " instances exist", errors.ECODE_INVAL)
4183 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185 raise errors.OpPrereqError("Cannot disable drbd helper while"
4186 " drbd-based instances exist",
4189 node_list = self.owned_locks(locking.LEVEL_NODE)
4191 # if vg_name not None, checks given volume group on all nodes
4193 vglist = self.rpc.call_vg_list(node_list)
4194 for node in node_list:
4195 msg = vglist[node].fail_msg
4197 # ignoring down node
4198 self.LogWarning("Error while gathering data on node %s"
4199 " (ignoring node): %s", node, msg)
4201 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4203 constants.MIN_VG_SIZE)
4205 raise errors.OpPrereqError("Error on node '%s': %s" %
4206 (node, vgstatus), errors.ECODE_ENVIRON)
4208 if self.op.drbd_helper:
4209 # checks given drbd helper on all nodes
4210 helpers = self.rpc.call_drbd_helper(node_list)
4211 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4213 self.LogInfo("Not checking drbd helper on offline node %s", node)
4215 msg = helpers[node].fail_msg
4217 raise errors.OpPrereqError("Error checking drbd helper on node"
4218 " '%s': %s" % (node, msg),
4219 errors.ECODE_ENVIRON)
4220 node_helper = helpers[node].payload
4221 if node_helper != self.op.drbd_helper:
4222 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223 (node, node_helper), errors.ECODE_ENVIRON)
4225 self.cluster = cluster = self.cfg.GetClusterInfo()
4226 # validate params changes
4227 if self.op.beparams:
4228 objects.UpgradeBeParams(self.op.beparams)
4229 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4232 if self.op.ndparams:
4233 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4236 # TODO: we need a more general way to handle resetting
4237 # cluster-level parameters to default values
4238 if self.new_ndparams["oob_program"] == "":
4239 self.new_ndparams["oob_program"] = \
4240 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4242 if self.op.hv_state:
4243 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244 self.cluster.hv_state_static)
4245 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246 for hv, values in new_hv_state.items())
4248 if self.op.disk_state:
4249 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250 self.cluster.disk_state_static)
4251 self.new_disk_state = \
4252 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253 for name, values in svalues.items()))
4254 for storage, svalues in new_disk_state.items())
4257 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4260 all_instances = self.cfg.GetAllInstancesInfo().values()
4262 for group in self.cfg.GetAllNodeGroupsInfo().values():
4263 instances = frozenset([inst for inst in all_instances
4264 if compat.any(node in group.members
4265 for node in inst.all_nodes)])
4266 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268 new = _ComputeNewInstanceViolations(ipol,
4269 new_ipolicy, instances)
4271 violations.update(new)
4274 self.LogWarning("After the ipolicy change the following instances"
4275 " violate them: %s",
4276 utils.CommaJoin(utils.NiceSort(violations)))
4278 if self.op.nicparams:
4279 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4284 # check all instances for consistency
4285 for instance in self.cfg.GetAllInstancesInfo().values():
4286 for nic_idx, nic in enumerate(instance.nics):
4287 params_copy = copy.deepcopy(nic.nicparams)
4288 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4290 # check parameter syntax
4292 objects.NIC.CheckParameterSyntax(params_filled)
4293 except errors.ConfigurationError, err:
4294 nic_errors.append("Instance %s, nic/%d: %s" %
4295 (instance.name, nic_idx, err))
4297 # if we're moving instances to routed, check that they have an ip
4298 target_mode = params_filled[constants.NIC_MODE]
4299 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301 " address" % (instance.name, nic_idx))
4303 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304 "\n".join(nic_errors), errors.ECODE_INVAL)
4306 # hypervisor list/parameters
4307 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308 if self.op.hvparams:
4309 for hv_name, hv_dict in self.op.hvparams.items():
4310 if hv_name not in self.new_hvparams:
4311 self.new_hvparams[hv_name] = hv_dict
4313 self.new_hvparams[hv_name].update(hv_dict)
4315 # disk template parameters
4316 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317 if self.op.diskparams:
4318 for dt_name, dt_params in self.op.diskparams.items():
4319 if dt_name not in self.op.diskparams:
4320 self.new_diskparams[dt_name] = dt_params
4322 self.new_diskparams[dt_name].update(dt_params)
4324 # os hypervisor parameters
4325 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4327 for os_name, hvs in self.op.os_hvp.items():
4328 if os_name not in self.new_os_hvp:
4329 self.new_os_hvp[os_name] = hvs
4331 for hv_name, hv_dict in hvs.items():
4333 # Delete if it exists
4334 self.new_os_hvp[os_name].pop(hv_name, None)
4335 elif hv_name not in self.new_os_hvp[os_name]:
4336 self.new_os_hvp[os_name][hv_name] = hv_dict
4338 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4341 self.new_osp = objects.FillDict(cluster.osparams, {})
4342 if self.op.osparams:
4343 for os_name, osp in self.op.osparams.items():
4344 if os_name not in self.new_osp:
4345 self.new_osp[os_name] = {}
4347 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4350 if not self.new_osp[os_name]:
4351 # we removed all parameters
4352 del self.new_osp[os_name]
4354 # check the parameter validity (remote check)
4355 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356 os_name, self.new_osp[os_name])
4358 # changes to the hypervisor list
4359 if self.op.enabled_hypervisors is not None:
4360 self.hv_list = self.op.enabled_hypervisors
4361 for hv in self.hv_list:
4362 # if the hypervisor doesn't already exist in the cluster
4363 # hvparams, we initialize it to empty, and then (in both
4364 # cases) we make sure to fill the defaults, as we might not
4365 # have a complete defaults list if the hypervisor wasn't
4367 if hv not in new_hvp:
4369 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4372 self.hv_list = cluster.enabled_hypervisors
4374 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375 # either the enabled list has changed, or the parameters have, validate
4376 for hv_name, hv_params in self.new_hvparams.items():
4377 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378 (self.op.enabled_hypervisors and
4379 hv_name in self.op.enabled_hypervisors)):
4380 # either this is a new hypervisor, or its parameters have changed
4381 hv_class = hypervisor.GetHypervisorClass(hv_name)
4382 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383 hv_class.CheckParameterSyntax(hv_params)
4384 _CheckHVParams(self, node_list, hv_name, hv_params)
4387 # no need to check any newly-enabled hypervisors, since the
4388 # defaults have already been checked in the above code-block
4389 for os_name, os_hvp in self.new_os_hvp.items():
4390 for hv_name, hv_params in os_hvp.items():
4391 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392 # we need to fill in the new os_hvp on top of the actual hv_p
4393 cluster_defaults = self.new_hvparams.get(hv_name, {})
4394 new_osp = objects.FillDict(cluster_defaults, hv_params)
4395 hv_class = hypervisor.GetHypervisorClass(hv_name)
4396 hv_class.CheckParameterSyntax(new_osp)
4397 _CheckHVParams(self, node_list, hv_name, new_osp)
4399 if self.op.default_iallocator:
4400 alloc_script = utils.FindFile(self.op.default_iallocator,
4401 constants.IALLOCATOR_SEARCH_PATH,
4403 if alloc_script is None:
4404 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405 " specified" % self.op.default_iallocator,
4408 def Exec(self, feedback_fn):
4409 """Change the parameters of the cluster.
4412 if self.op.vg_name is not None:
4413 new_volume = self.op.vg_name
4416 if new_volume != self.cfg.GetVGName():
4417 self.cfg.SetVGName(new_volume)
4419 feedback_fn("Cluster LVM configuration already in desired"
4420 " state, not changing")
4421 if self.op.drbd_helper is not None:
4422 new_helper = self.op.drbd_helper
4425 if new_helper != self.cfg.GetDRBDHelper():
4426 self.cfg.SetDRBDHelper(new_helper)
4428 feedback_fn("Cluster DRBD helper already in desired state,"
4430 if self.op.hvparams:
4431 self.cluster.hvparams = self.new_hvparams
4433 self.cluster.os_hvp = self.new_os_hvp
4434 if self.op.enabled_hypervisors is not None:
4435 self.cluster.hvparams = self.new_hvparams
4436 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437 if self.op.beparams:
4438 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439 if self.op.nicparams:
4440 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4442 self.cluster.ipolicy = self.new_ipolicy
4443 if self.op.osparams:
4444 self.cluster.osparams = self.new_osp
4445 if self.op.ndparams:
4446 self.cluster.ndparams = self.new_ndparams
4447 if self.op.diskparams:
4448 self.cluster.diskparams = self.new_diskparams
4449 if self.op.hv_state:
4450 self.cluster.hv_state_static = self.new_hv_state
4451 if self.op.disk_state:
4452 self.cluster.disk_state_static = self.new_disk_state
4454 if self.op.candidate_pool_size is not None:
4455 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456 # we need to update the pool size here, otherwise the save will fail
4457 _AdjustCandidatePool(self, [])
4459 if self.op.maintain_node_health is not None:
4460 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461 feedback_fn("Note: CONFD was disabled at build time, node health"
4462 " maintenance is not useful (still enabling it)")
4463 self.cluster.maintain_node_health = self.op.maintain_node_health
4465 if self.op.prealloc_wipe_disks is not None:
4466 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4468 if self.op.add_uids is not None:
4469 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4471 if self.op.remove_uids is not None:
4472 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4474 if self.op.uid_pool is not None:
4475 self.cluster.uid_pool = self.op.uid_pool
4477 if self.op.default_iallocator is not None:
4478 self.cluster.default_iallocator = self.op.default_iallocator
4480 if self.op.reserved_lvs is not None:
4481 self.cluster.reserved_lvs = self.op.reserved_lvs
4483 if self.op.use_external_mip_script is not None:
4484 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4486 def helper_os(aname, mods, desc):
4488 lst = getattr(self.cluster, aname)
4489 for key, val in mods:
4490 if key == constants.DDM_ADD:
4492 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4495 elif key == constants.DDM_REMOVE:
4499 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4501 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4503 if self.op.hidden_os:
4504 helper_os("hidden_os", self.op.hidden_os, "hidden")
4506 if self.op.blacklisted_os:
4507 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4509 if self.op.master_netdev:
4510 master_params = self.cfg.GetMasterNetworkParameters()
4511 ems = self.cfg.GetUseExternalMipScript()
4512 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513 self.cluster.master_netdev)
4514 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4516 result.Raise("Could not disable the master ip")
4517 feedback_fn("Changing master_netdev from %s to %s" %
4518 (master_params.netdev, self.op.master_netdev))
4519 self.cluster.master_netdev = self.op.master_netdev
4521 if self.op.master_netmask:
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524 result = self.rpc.call_node_change_master_netmask(master_params.name,
4525 master_params.netmask,
4526 self.op.master_netmask,
4528 master_params.netdev)
4530 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4533 self.cluster.master_netmask = self.op.master_netmask
4535 self.cfg.Update(self.cluster, feedback_fn)
4537 if self.op.master_netdev:
4538 master_params = self.cfg.GetMasterNetworkParameters()
4539 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540 self.op.master_netdev)
4541 ems = self.cfg.GetUseExternalMipScript()
4542 result = self.rpc.call_node_activate_master_ip(master_params.name,
4545 self.LogWarning("Could not re-enable the master ip on"
4546 " the master, please restart manually: %s",
4550 def _UploadHelper(lu, nodes, fname):
4551 """Helper for uploading a file and showing warnings.
4554 if os.path.exists(fname):
4555 result = lu.rpc.call_upload_file(nodes, fname)
4556 for to_node, to_result in result.items():
4557 msg = to_result.fail_msg
4559 msg = ("Copy of file %s to node %s failed: %s" %
4560 (fname, to_node, msg))
4564 def _ComputeAncillaryFiles(cluster, redist):
4565 """Compute files external to Ganeti which need to be consistent.
4567 @type redist: boolean
4568 @param redist: Whether to include files which need to be redistributed
4571 # Compute files for all nodes
4573 pathutils.SSH_KNOWN_HOSTS_FILE,
4574 pathutils.CONFD_HMAC_KEY,
4575 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576 pathutils.SPICE_CERT_FILE,
4577 pathutils.SPICE_CACERT_FILE,
4578 pathutils.RAPI_USERS_FILE,
4582 # we need to ship at least the RAPI certificate
4583 files_all.add(pathutils.RAPI_CERT_FILE)
4585 files_all.update(pathutils.ALL_CERT_FILES)
4586 files_all.update(ssconf.SimpleStore().GetFileList())
4588 if cluster.modify_etc_hosts:
4589 files_all.add(pathutils.ETC_HOSTS)
4591 if cluster.use_external_mip_script:
4592 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4594 # Files which are optional, these must:
4595 # - be present in one other category as well
4596 # - either exist or not exist on all nodes of that category (mc, vm all)
4598 pathutils.RAPI_USERS_FILE,
4601 # Files which should only be on master candidates
4605 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4609 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4613 # Files which should only be on VM-capable nodes
4616 for hv_name in cluster.enabled_hypervisors
4618 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4622 for hv_name in cluster.enabled_hypervisors
4624 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4626 # Filenames in each category must be unique
4627 all_files_set = files_all | files_mc | files_vm
4628 assert (len(all_files_set) ==
4629 sum(map(len, [files_all, files_mc, files_vm]))), \
4630 "Found file listed in more than one file list"
4632 # Optional files must be present in one other category
4633 assert all_files_set.issuperset(files_opt), \
4634 "Optional file not in a different required list"
4636 # This one file should never ever be re-distributed via RPC
4637 assert not (redist and
4638 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4640 return (files_all, files_opt, files_mc, files_vm)
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644 """Distribute additional files which are part of the cluster configuration.
4646 ConfigWriter takes care of distributing the config and ssconf files, but
4647 there are more files which should be distributed to all nodes. This function
4648 makes sure those are copied.
4650 @param lu: calling logical unit
4651 @param additional_nodes: list of nodes not in the config to distribute to
4652 @type additional_vm: boolean
4653 @param additional_vm: whether the additional nodes are vm-capable or not
4656 # Gather target nodes
4657 cluster = lu.cfg.GetClusterInfo()
4658 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4660 online_nodes = lu.cfg.GetOnlineNodeList()
4661 online_set = frozenset(online_nodes)
4662 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4664 if additional_nodes is not None:
4665 online_nodes.extend(additional_nodes)
4667 vm_nodes.extend(additional_nodes)
4669 # Never distribute to master node
4670 for nodelist in [online_nodes, vm_nodes]:
4671 if master_info.name in nodelist:
4672 nodelist.remove(master_info.name)
4675 (files_all, _, files_mc, files_vm) = \
4676 _ComputeAncillaryFiles(cluster, True)
4678 # Never re-distribute configuration file from here
4679 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680 pathutils.CLUSTER_CONF_FILE in files_vm)
4681 assert not files_mc, "Master candidates not handled in this function"
4684 (online_nodes, files_all),
4685 (vm_nodes, files_vm),
4689 for (node_list, files) in filemap:
4691 _UploadHelper(lu, node_list, fname)
4694 class LUClusterRedistConf(NoHooksLU):
4695 """Force the redistribution of cluster configuration.
4697 This is a very simple LU.
4702 def ExpandNames(self):
4703 self.needed_locks = {
4704 locking.LEVEL_NODE: locking.ALL_SET,
4705 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4707 self.share_locks = _ShareAll()
4709 def Exec(self, feedback_fn):
4710 """Redistribute the configuration.
4713 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714 _RedistributeAncillaryFiles(self)
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718 """Activate the master IP on the master node.
4721 def Exec(self, feedback_fn):
4722 """Activate the master IP.
4725 master_params = self.cfg.GetMasterNetworkParameters()
4726 ems = self.cfg.GetUseExternalMipScript()
4727 result = self.rpc.call_node_activate_master_ip(master_params.name,
4729 result.Raise("Could not activate the master IP")
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733 """Deactivate the master IP on the master node.
4736 def Exec(self, feedback_fn):
4737 """Deactivate the master IP.
4740 master_params = self.cfg.GetMasterNetworkParameters()
4741 ems = self.cfg.GetUseExternalMipScript()
4742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4744 result.Raise("Could not deactivate the master IP")
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748 """Sleep and poll for an instance's disk to sync.
4751 if not instance.disks or disks is not None and not disks:
4754 disks = _ExpandCheckDisks(instance, disks)
4757 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4759 node = instance.primary_node
4762 lu.cfg.SetDiskID(dev, node)
4764 # TODO: Convert to utils.Retry
4767 degr_retries = 10 # in seconds, as we sleep 1 second each time
4771 cumul_degraded = False
4772 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773 msg = rstats.fail_msg
4775 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4778 raise errors.RemoteError("Can't contact node %s for mirror data,"
4779 " aborting." % node)
4782 rstats = rstats.payload
4784 for i, mstat in enumerate(rstats):
4786 lu.LogWarning("Can't compute data for node %s/%s",
4787 node, disks[i].iv_name)
4790 cumul_degraded = (cumul_degraded or
4791 (mstat.is_degraded and mstat.sync_percent is None))
4792 if mstat.sync_percent is not None:
4794 if mstat.estimated_time is not None:
4795 rem_time = ("%s remaining (estimated)" %
4796 utils.FormatSeconds(mstat.estimated_time))
4797 max_time = mstat.estimated_time
4799 rem_time = "no time estimate"
4800 lu.LogInfo("- device %s: %5.2f%% done, %s",
4801 disks[i].iv_name, mstat.sync_percent, rem_time)
4803 # if we're done but degraded, let's do a few small retries, to
4804 # make sure we see a stable and not transient situation; therefore
4805 # we force restart of the loop
4806 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807 logging.info("Degraded disks found, %d retries left", degr_retries)
4815 time.sleep(min(60, max_time))
4818 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4820 return not cumul_degraded
4823 def _BlockdevFind(lu, node, dev, instance):
4824 """Wrapper around call_blockdev_find to annotate diskparams.
4826 @param lu: A reference to the lu object
4827 @param node: The node to call out
4828 @param dev: The device to find
4829 @param instance: The instance object the device belongs to
4830 @returns The result of the rpc call
4833 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834 return lu.rpc.call_blockdev_find(node, disk)
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838 """Wrapper around L{_CheckDiskConsistencyInner}.
4841 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4848 """Check that mirrors are not degraded.
4850 @attention: The device has to be annotated already.
4852 The ldisk parameter, if True, will change the test from the
4853 is_degraded attribute (which represents overall non-ok status for
4854 the device(s)) to the ldisk (representing the local storage status).
4857 lu.cfg.SetDiskID(dev, node)
4861 if on_primary or dev.AssembleOnSecondary():
4862 rstats = lu.rpc.call_blockdev_find(node, dev)
4863 msg = rstats.fail_msg
4865 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4867 elif not rstats.payload:
4868 lu.LogWarning("Can't find disk on node %s", node)
4872 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4874 result = result and not rstats.payload.is_degraded
4877 for child in dev.children:
4878 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4884 class LUOobCommand(NoHooksLU):
4885 """Logical unit for OOB handling.
4889 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4891 def ExpandNames(self):
4892 """Gather locks we need.
4895 if self.op.node_names:
4896 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897 lock_names = self.op.node_names
4899 lock_names = locking.ALL_SET
4901 self.needed_locks = {
4902 locking.LEVEL_NODE: lock_names,
4905 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4907 if not self.op.node_names:
4908 # Acquire node allocation lock only if all nodes are affected
4909 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4911 def CheckPrereq(self):
4912 """Check prerequisites.
4915 - the node exists in the configuration
4918 Any errors are signaled by raising errors.OpPrereqError.
4922 self.master_node = self.cfg.GetMasterNode()
4924 assert self.op.power_delay >= 0.0
4926 if self.op.node_names:
4927 if (self.op.command in self._SKIP_MASTER and
4928 self.master_node in self.op.node_names):
4929 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4932 if master_oob_handler:
4933 additional_text = ("run '%s %s %s' if you want to operate on the"
4934 " master regardless") % (master_oob_handler,
4938 additional_text = "it does not support out-of-band operations"
4940 raise errors.OpPrereqError(("Operating on the master node %s is not"
4941 " allowed for %s; %s") %
4942 (self.master_node, self.op.command,
4943 additional_text), errors.ECODE_INVAL)
4945 self.op.node_names = self.cfg.GetNodeList()
4946 if self.op.command in self._SKIP_MASTER:
4947 self.op.node_names.remove(self.master_node)
4949 if self.op.command in self._SKIP_MASTER:
4950 assert self.master_node not in self.op.node_names
4952 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4954 raise errors.OpPrereqError("Node %s not found" % node_name,
4957 self.nodes.append(node)
4959 if (not self.op.ignore_status and
4960 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962 " not marked offline") % node_name,
4965 def Exec(self, feedback_fn):
4966 """Execute OOB and return result if we expect any.
4969 master_node = self.master_node
4972 for idx, node in enumerate(utils.NiceSort(self.nodes,
4973 key=lambda node: node.name)):
4974 node_entry = [(constants.RS_NORMAL, node.name)]
4975 ret.append(node_entry)
4977 oob_program = _SupportsOob(self.cfg, node)
4980 node_entry.append((constants.RS_UNAVAIL, None))
4983 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984 self.op.command, oob_program, node.name)
4985 result = self.rpc.call_run_oob(master_node, oob_program,
4986 self.op.command, node.name,
4990 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991 node.name, result.fail_msg)
4992 node_entry.append((constants.RS_NODATA, None))
4995 self._CheckPayload(result)
4996 except errors.OpExecError, err:
4997 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4999 node_entry.append((constants.RS_NODATA, None))
5001 if self.op.command == constants.OOB_HEALTH:
5002 # For health we should log important events
5003 for item, status in result.payload:
5004 if status in [constants.OOB_STATUS_WARNING,
5005 constants.OOB_STATUS_CRITICAL]:
5006 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007 item, node.name, status)
5009 if self.op.command == constants.OOB_POWER_ON:
5011 elif self.op.command == constants.OOB_POWER_OFF:
5012 node.powered = False
5013 elif self.op.command == constants.OOB_POWER_STATUS:
5014 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015 if powered != node.powered:
5016 logging.warning(("Recorded power state (%s) of node '%s' does not"
5017 " match actual power state (%s)"), node.powered,
5020 # For configuration changing commands we should update the node
5021 if self.op.command in (constants.OOB_POWER_ON,
5022 constants.OOB_POWER_OFF):
5023 self.cfg.Update(node, feedback_fn)
5025 node_entry.append((constants.RS_NORMAL, result.payload))
5027 if (self.op.command == constants.OOB_POWER_ON and
5028 idx < len(self.nodes) - 1):
5029 time.sleep(self.op.power_delay)
5033 def _CheckPayload(self, result):
5034 """Checks if the payload is valid.
5036 @param result: RPC result
5037 @raises errors.OpExecError: If payload is not valid
5041 if self.op.command == constants.OOB_HEALTH:
5042 if not isinstance(result.payload, list):
5043 errs.append("command 'health' is expected to return a list but got %s" %
5044 type(result.payload))
5046 for item, status in result.payload:
5047 if status not in constants.OOB_STATUSES:
5048 errs.append("health item '%s' has invalid status '%s'" %
5051 if self.op.command == constants.OOB_POWER_STATUS:
5052 if not isinstance(result.payload, dict):
5053 errs.append("power-status is expected to return a dict but got %s" %
5054 type(result.payload))
5056 if self.op.command in [
5057 constants.OOB_POWER_ON,
5058 constants.OOB_POWER_OFF,
5059 constants.OOB_POWER_CYCLE,
5061 if result.payload is not None:
5062 errs.append("%s is expected to not return payload but got '%s'" %
5063 (self.op.command, result.payload))
5066 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067 utils.CommaJoin(errs))
5070 class _OsQuery(_QueryBase):
5071 FIELDS = query.OS_FIELDS
5073 def ExpandNames(self, lu):
5074 # Lock all nodes in shared mode
5075 # Temporary removal of locks, should be reverted later
5076 # TODO: reintroduce locks when they are lighter-weight
5077 lu.needed_locks = {}
5078 #self.share_locks[locking.LEVEL_NODE] = 1
5079 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5081 # The following variables interact with _QueryBase._GetNames
5083 self.wanted = self.names
5085 self.wanted = locking.ALL_SET
5087 self.do_locking = self.use_locking
5089 def DeclareLocks(self, lu, level):
5093 def _DiagnoseByOS(rlist):
5094 """Remaps a per-node return list into an a per-os per-node dictionary
5096 @param rlist: a map with node names as keys and OS objects as values
5099 @return: a dictionary with osnames as keys and as value another
5100 map, with nodes as keys and tuples of (path, status, diagnose,
5101 variants, parameters, api_versions) as values, eg::
5103 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104 (/srv/..., False, "invalid api")],
5105 "node2": [(/srv/..., True, "", [], [])]}
5110 # we build here the list of nodes that didn't fail the RPC (at RPC
5111 # level), so that nodes with a non-responding node daemon don't
5112 # make all OSes invalid
5113 good_nodes = [node_name for node_name in rlist
5114 if not rlist[node_name].fail_msg]
5115 for node_name, nr in rlist.items():
5116 if nr.fail_msg or not nr.payload:
5118 for (name, path, status, diagnose, variants,
5119 params, api_versions) in nr.payload:
5120 if name not in all_os:
5121 # build a list of nodes for this os containing empty lists
5122 # for each node in node_list
5124 for nname in good_nodes:
5125 all_os[name][nname] = []
5126 # convert params from [name, help] to (name, help)
5127 params = [tuple(v) for v in params]
5128 all_os[name][node_name].append((path, status, diagnose,
5129 variants, params, api_versions))
5132 def _GetQueryData(self, lu):
5133 """Computes the list of nodes and their attributes.
5136 # Locking is not used
5137 assert not (compat.any(lu.glm.is_owned(level)
5138 for level in locking.LEVELS
5139 if level != locking.LEVEL_CLUSTER) or
5140 self.do_locking or self.use_locking)
5142 valid_nodes = [node.name
5143 for node in lu.cfg.GetAllNodesInfo().values()
5144 if not node.offline and node.vm_capable]
5145 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146 cluster = lu.cfg.GetClusterInfo()
5150 for (os_name, os_data) in pol.items():
5151 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152 hidden=(os_name in cluster.hidden_os),
5153 blacklisted=(os_name in cluster.blacklisted_os))
5157 api_versions = set()
5159 for idx, osl in enumerate(os_data.values()):
5160 info.valid = bool(info.valid and osl and osl[0][1])
5164 (node_variants, node_params, node_api) = osl[0][3:6]
5167 variants.update(node_variants)
5168 parameters.update(node_params)
5169 api_versions.update(node_api)
5171 # Filter out inconsistent values
5172 variants.intersection_update(node_variants)
5173 parameters.intersection_update(node_params)
5174 api_versions.intersection_update(node_api)
5176 info.variants = list(variants)
5177 info.parameters = list(parameters)
5178 info.api_versions = list(api_versions)
5180 data[os_name] = info
5182 # Prepare data in requested order
5183 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5187 class LUOsDiagnose(NoHooksLU):
5188 """Logical unit for OS diagnose/query.
5194 def _BuildFilter(fields, names):
5195 """Builds a filter for querying OSes.
5198 name_filter = qlang.MakeSimpleFilter("name", names)
5200 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201 # respective field is not requested
5202 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203 for fname in ["hidden", "blacklisted"]
5204 if fname not in fields]
5205 if "valid" not in fields:
5206 status_filter.append([qlang.OP_TRUE, "valid"])
5209 status_filter.insert(0, qlang.OP_AND)
5211 status_filter = None
5213 if name_filter and status_filter:
5214 return [qlang.OP_AND, name_filter, status_filter]
5218 return status_filter
5220 def CheckArguments(self):
5221 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222 self.op.output_fields, False)
5224 def ExpandNames(self):
5225 self.oq.ExpandNames(self)
5227 def Exec(self, feedback_fn):
5228 return self.oq.OldStyleQuery(self)
5231 class _ExtStorageQuery(_QueryBase):
5232 FIELDS = query.EXTSTORAGE_FIELDS
5234 def ExpandNames(self, lu):
5235 # Lock all nodes in shared mode
5236 # Temporary removal of locks, should be reverted later
5237 # TODO: reintroduce locks when they are lighter-weight
5238 lu.needed_locks = {}
5239 #self.share_locks[locking.LEVEL_NODE] = 1
5240 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5242 # The following variables interact with _QueryBase._GetNames
5244 self.wanted = self.names
5246 self.wanted = locking.ALL_SET
5248 self.do_locking = self.use_locking
5250 def DeclareLocks(self, lu, level):
5254 def _DiagnoseByProvider(rlist):
5255 """Remaps a per-node return list into an a per-provider per-node dictionary
5257 @param rlist: a map with node names as keys and ExtStorage objects as values
5260 @return: a dictionary with extstorage providers as keys and as
5261 value another map, with nodes as keys and tuples of
5262 (path, status, diagnose, parameters) as values, eg::
5264 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265 "node2": [(/srv/..., False, "missing file")]
5266 "node3": [(/srv/..., True, "", [])]
5271 # we build here the list of nodes that didn't fail the RPC (at RPC
5272 # level), so that nodes with a non-responding node daemon don't
5273 # make all OSes invalid
5274 good_nodes = [node_name for node_name in rlist
5275 if not rlist[node_name].fail_msg]
5276 for node_name, nr in rlist.items():
5277 if nr.fail_msg or not nr.payload:
5279 for (name, path, status, diagnose, params) in nr.payload:
5280 if name not in all_es:
5281 # build a list of nodes for this os containing empty lists
5282 # for each node in node_list
5284 for nname in good_nodes:
5285 all_es[name][nname] = []
5286 # convert params from [name, help] to (name, help)
5287 params = [tuple(v) for v in params]
5288 all_es[name][node_name].append((path, status, diagnose, params))
5291 def _GetQueryData(self, lu):
5292 """Computes the list of nodes and their attributes.
5295 # Locking is not used
5296 assert not (compat.any(lu.glm.is_owned(level)
5297 for level in locking.LEVELS
5298 if level != locking.LEVEL_CLUSTER) or
5299 self.do_locking or self.use_locking)
5301 valid_nodes = [node.name
5302 for node in lu.cfg.GetAllNodesInfo().values()
5303 if not node.offline and node.vm_capable]
5304 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5308 nodegroup_list = lu.cfg.GetNodeGroupList()
5310 for (es_name, es_data) in pol.items():
5311 # For every provider compute the nodegroup validity.
5312 # To do this we need to check the validity of each node in es_data
5313 # and then construct the corresponding nodegroup dict:
5314 # { nodegroup1: status
5315 # nodegroup2: status
5318 for nodegroup in nodegroup_list:
5319 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5321 nodegroup_nodes = ndgrp.members
5322 nodegroup_name = ndgrp.name
5325 for node in nodegroup_nodes:
5326 if node in valid_nodes:
5327 if es_data[node] != []:
5328 node_status = es_data[node][0][1]
5329 node_statuses.append(node_status)
5331 node_statuses.append(False)
5333 if False in node_statuses:
5334 ndgrp_data[nodegroup_name] = False
5336 ndgrp_data[nodegroup_name] = True
5338 # Compute the provider's parameters
5340 for idx, esl in enumerate(es_data.values()):
5341 valid = bool(esl and esl[0][1])
5345 node_params = esl[0][3]
5348 parameters.update(node_params)
5350 # Filter out inconsistent values
5351 parameters.intersection_update(node_params)
5353 params = list(parameters)
5355 # Now fill all the info for this provider
5356 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357 nodegroup_status=ndgrp_data,
5360 data[es_name] = info
5362 # Prepare data in requested order
5363 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5367 class LUExtStorageDiagnose(NoHooksLU):
5368 """Logical unit for ExtStorage diagnose/query.
5373 def CheckArguments(self):
5374 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375 self.op.output_fields, False)
5377 def ExpandNames(self):
5378 self.eq.ExpandNames(self)
5380 def Exec(self, feedback_fn):
5381 return self.eq.OldStyleQuery(self)
5384 class LUNodeRemove(LogicalUnit):
5385 """Logical unit for removing a node.
5388 HPATH = "node-remove"
5389 HTYPE = constants.HTYPE_NODE
5391 def BuildHooksEnv(self):
5396 "OP_TARGET": self.op.node_name,
5397 "NODE_NAME": self.op.node_name,
5400 def BuildHooksNodes(self):
5401 """Build hooks nodes.
5403 This doesn't run on the target node in the pre phase as a failed
5404 node would then be impossible to remove.
5407 all_nodes = self.cfg.GetNodeList()
5409 all_nodes.remove(self.op.node_name)
5412 return (all_nodes, all_nodes)
5414 def CheckPrereq(self):
5415 """Check prerequisites.
5418 - the node exists in the configuration
5419 - it does not have primary or secondary instances
5420 - it's not the master
5422 Any errors are signaled by raising errors.OpPrereqError.
5425 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426 node = self.cfg.GetNodeInfo(self.op.node_name)
5427 assert node is not None
5429 masternode = self.cfg.GetMasterNode()
5430 if node.name == masternode:
5431 raise errors.OpPrereqError("Node is the master node, failover to another"
5432 " node is required", errors.ECODE_INVAL)
5434 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435 if node.name in instance.all_nodes:
5436 raise errors.OpPrereqError("Instance %s is still running on the node,"
5437 " please remove first" % instance_name,
5439 self.op.node_name = node.name
5442 def Exec(self, feedback_fn):
5443 """Removes the node from the cluster.
5447 logging.info("Stopping the node daemon and removing configs from node %s",
5450 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5452 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5455 # Promote nodes to master candidate as needed
5456 _AdjustCandidatePool(self, exceptions=[node.name])
5457 self.context.RemoveNode(node.name)
5459 # Run post hooks on the node before it's removed
5460 _RunPostHook(self, node.name)
5462 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463 msg = result.fail_msg
5465 self.LogWarning("Errors encountered on the remote node while leaving"
5466 " the cluster: %s", msg)
5468 # Remove node from our /etc/hosts
5469 if self.cfg.GetClusterInfo().modify_etc_hosts:
5470 master_node = self.cfg.GetMasterNode()
5471 result = self.rpc.call_etc_hosts_modify(master_node,
5472 constants.ETC_HOSTS_REMOVE,
5474 result.Raise("Can't update hosts file with new host data")
5475 _RedistributeAncillaryFiles(self)
5478 class _NodeQuery(_QueryBase):
5479 FIELDS = query.NODE_FIELDS
5481 def ExpandNames(self, lu):
5482 lu.needed_locks = {}
5483 lu.share_locks = _ShareAll()
5486 self.wanted = _GetWantedNodes(lu, self.names)
5488 self.wanted = locking.ALL_SET
5490 self.do_locking = (self.use_locking and
5491 query.NQ_LIVE in self.requested_data)
5494 # If any non-static field is requested we need to lock the nodes
5495 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5498 def DeclareLocks(self, lu, level):
5501 def _GetQueryData(self, lu):
5502 """Computes the list of nodes and their attributes.
5505 all_info = lu.cfg.GetAllNodesInfo()
5507 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5509 # Gather data as requested
5510 if query.NQ_LIVE in self.requested_data:
5511 # filter out non-vm_capable nodes
5512 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5514 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516 [lu.cfg.GetHypervisorType()], es_flags)
5517 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518 for (name, nresult) in node_data.items()
5519 if not nresult.fail_msg and nresult.payload)
5523 if query.NQ_INST in self.requested_data:
5524 node_to_primary = dict([(name, set()) for name in nodenames])
5525 node_to_secondary = dict([(name, set()) for name in nodenames])
5527 inst_data = lu.cfg.GetAllInstancesInfo()
5529 for inst in inst_data.values():
5530 if inst.primary_node in node_to_primary:
5531 node_to_primary[inst.primary_node].add(inst.name)
5532 for secnode in inst.secondary_nodes:
5533 if secnode in node_to_secondary:
5534 node_to_secondary[secnode].add(inst.name)
5536 node_to_primary = None
5537 node_to_secondary = None
5539 if query.NQ_OOB in self.requested_data:
5540 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541 for name, node in all_info.iteritems())
5545 if query.NQ_GROUP in self.requested_data:
5546 groups = lu.cfg.GetAllNodeGroupsInfo()
5550 return query.NodeQueryData([all_info[name] for name in nodenames],
5551 live_data, lu.cfg.GetMasterNode(),
5552 node_to_primary, node_to_secondary, groups,
5553 oob_support, lu.cfg.GetClusterInfo())
5556 class LUNodeQuery(NoHooksLU):
5557 """Logical unit for querying nodes.
5560 # pylint: disable=W0142
5563 def CheckArguments(self):
5564 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565 self.op.output_fields, self.op.use_locking)
5567 def ExpandNames(self):
5568 self.nq.ExpandNames(self)
5570 def DeclareLocks(self, level):
5571 self.nq.DeclareLocks(self, level)
5573 def Exec(self, feedback_fn):
5574 return self.nq.OldStyleQuery(self)
5577 class LUNodeQueryvols(NoHooksLU):
5578 """Logical unit for getting volumes on node(s).
5582 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583 _FIELDS_STATIC = utils.FieldSet("node")
5585 def CheckArguments(self):
5586 _CheckOutputFields(static=self._FIELDS_STATIC,
5587 dynamic=self._FIELDS_DYNAMIC,
5588 selected=self.op.output_fields)
5590 def ExpandNames(self):
5591 self.share_locks = _ShareAll()
5594 self.needed_locks = {
5595 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5598 self.needed_locks = {
5599 locking.LEVEL_NODE: locking.ALL_SET,
5600 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5603 def Exec(self, feedback_fn):
5604 """Computes the list of nodes and their attributes.
5607 nodenames = self.owned_locks(locking.LEVEL_NODE)
5608 volumes = self.rpc.call_node_volumes(nodenames)
5610 ilist = self.cfg.GetAllInstancesInfo()
5611 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5614 for node in nodenames:
5615 nresult = volumes[node]
5618 msg = nresult.fail_msg
5620 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5623 node_vols = sorted(nresult.payload,
5624 key=operator.itemgetter("dev"))
5626 for vol in node_vols:
5628 for field in self.op.output_fields:
5631 elif field == "phys":
5635 elif field == "name":
5637 elif field == "size":
5638 val = int(float(vol["size"]))
5639 elif field == "instance":
5640 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5642 raise errors.ParameterError(field)
5643 node_output.append(str(val))
5645 output.append(node_output)
5650 class LUNodeQueryStorage(NoHooksLU):
5651 """Logical unit for getting information on storage units on node(s).
5654 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5657 def CheckArguments(self):
5658 _CheckOutputFields(static=self._FIELDS_STATIC,
5659 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660 selected=self.op.output_fields)
5662 def ExpandNames(self):
5663 self.share_locks = _ShareAll()
5666 self.needed_locks = {
5667 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5670 self.needed_locks = {
5671 locking.LEVEL_NODE: locking.ALL_SET,
5672 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5675 def Exec(self, feedback_fn):
5676 """Computes the list of nodes and their attributes.
5679 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5681 # Always get name to sort by
5682 if constants.SF_NAME in self.op.output_fields:
5683 fields = self.op.output_fields[:]
5685 fields = [constants.SF_NAME] + self.op.output_fields
5687 # Never ask for node or type as it's only known to the LU
5688 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689 while extra in fields:
5690 fields.remove(extra)
5692 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693 name_idx = field_idx[constants.SF_NAME]
5695 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696 data = self.rpc.call_storage_list(self.nodes,
5697 self.op.storage_type, st_args,
5698 self.op.name, fields)
5702 for node in utils.NiceSort(self.nodes):
5703 nresult = data[node]
5707 msg = nresult.fail_msg
5709 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5712 rows = dict([(row[name_idx], row) for row in nresult.payload])
5714 for name in utils.NiceSort(rows.keys()):
5719 for field in self.op.output_fields:
5720 if field == constants.SF_NODE:
5722 elif field == constants.SF_TYPE:
5723 val = self.op.storage_type
5724 elif field in field_idx:
5725 val = row[field_idx[field]]
5727 raise errors.ParameterError(field)
5736 class _InstanceQuery(_QueryBase):
5737 FIELDS = query.INSTANCE_FIELDS
5739 def ExpandNames(self, lu):
5740 lu.needed_locks = {}
5741 lu.share_locks = _ShareAll()
5744 self.wanted = _GetWantedInstances(lu, self.names)
5746 self.wanted = locking.ALL_SET
5748 self.do_locking = (self.use_locking and
5749 query.IQ_LIVE in self.requested_data)
5751 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753 lu.needed_locks[locking.LEVEL_NODE] = []
5754 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5756 self.do_grouplocks = (self.do_locking and
5757 query.IQ_NODES in self.requested_data)
5759 def DeclareLocks(self, lu, level):
5761 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5762 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5764 # Lock all groups used by instances optimistically; this requires going
5765 # via the node before it's locked, requiring verification later on
5766 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5768 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5769 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5770 elif level == locking.LEVEL_NODE:
5771 lu._LockInstancesNodes() # pylint: disable=W0212
5774 def _CheckGroupLocks(lu):
5775 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5776 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5778 # Check if node groups for locked instances are still correct
5779 for instance_name in owned_instances:
5780 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5782 def _GetQueryData(self, lu):
5783 """Computes the list of instances and their attributes.
5786 if self.do_grouplocks:
5787 self._CheckGroupLocks(lu)
5789 cluster = lu.cfg.GetClusterInfo()
5790 all_info = lu.cfg.GetAllInstancesInfo()
5792 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5794 instance_list = [all_info[name] for name in instance_names]
5795 nodes = frozenset(itertools.chain(*(inst.all_nodes
5796 for inst in instance_list)))
5797 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5800 wrongnode_inst = set()
5802 # Gather data as requested
5803 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5805 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5807 result = node_data[name]
5809 # offline nodes will be in both lists
5810 assert result.fail_msg
5811 offline_nodes.append(name)
5813 bad_nodes.append(name)
5814 elif result.payload:
5815 for inst in result.payload:
5816 if inst in all_info:
5817 if all_info[inst].primary_node == name:
5818 live_data.update(result.payload)
5820 wrongnode_inst.add(inst)
5822 # orphan instance; we don't list it here as we don't
5823 # handle this case yet in the output of instance listing
5824 logging.warning("Orphan instance '%s' found on node %s",
5826 # else no instance is alive
5830 if query.IQ_DISKUSAGE in self.requested_data:
5831 gmi = ganeti.masterd.instance
5832 disk_usage = dict((inst.name,
5833 gmi.ComputeDiskSize(inst.disk_template,
5834 [{constants.IDISK_SIZE: disk.size}
5835 for disk in inst.disks]))
5836 for inst in instance_list)
5840 if query.IQ_CONSOLE in self.requested_data:
5842 for inst in instance_list:
5843 if inst.name in live_data:
5844 # Instance is running
5845 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5847 consinfo[inst.name] = None
5848 assert set(consinfo.keys()) == set(instance_names)
5852 if query.IQ_NODES in self.requested_data:
5853 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5855 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5856 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5857 for uuid in set(map(operator.attrgetter("group"),
5863 if query.IQ_NETWORKS in self.requested_data:
5864 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5865 for i in instance_list))
5866 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5870 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5871 disk_usage, offline_nodes, bad_nodes,
5872 live_data, wrongnode_inst, consinfo,
5873 nodes, groups, networks)
5876 class LUQuery(NoHooksLU):
5877 """Query for resources/items of a certain kind.
5880 # pylint: disable=W0142
5883 def CheckArguments(self):
5884 qcls = _GetQueryImplementation(self.op.what)
5886 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5888 def ExpandNames(self):
5889 self.impl.ExpandNames(self)
5891 def DeclareLocks(self, level):
5892 self.impl.DeclareLocks(self, level)
5894 def Exec(self, feedback_fn):
5895 return self.impl.NewStyleQuery(self)
5898 class LUQueryFields(NoHooksLU):
5899 """Query for resources/items of a certain kind.
5902 # pylint: disable=W0142
5905 def CheckArguments(self):
5906 self.qcls = _GetQueryImplementation(self.op.what)
5908 def ExpandNames(self):
5909 self.needed_locks = {}
5911 def Exec(self, feedback_fn):
5912 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5915 class LUNodeModifyStorage(NoHooksLU):
5916 """Logical unit for modifying a storage volume on a node.
5921 def CheckArguments(self):
5922 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5924 storage_type = self.op.storage_type
5927 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5929 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5930 " modified" % storage_type,
5933 diff = set(self.op.changes.keys()) - modifiable
5935 raise errors.OpPrereqError("The following fields can not be modified for"
5936 " storage units of type '%s': %r" %
5937 (storage_type, list(diff)),
5940 def ExpandNames(self):
5941 self.needed_locks = {
5942 locking.LEVEL_NODE: self.op.node_name,
5945 def Exec(self, feedback_fn):
5946 """Computes the list of nodes and their attributes.
5949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5950 result = self.rpc.call_storage_modify(self.op.node_name,
5951 self.op.storage_type, st_args,
5952 self.op.name, self.op.changes)
5953 result.Raise("Failed to modify storage unit '%s' on %s" %
5954 (self.op.name, self.op.node_name))
5957 class LUNodeAdd(LogicalUnit):
5958 """Logical unit for adding node to the cluster.
5962 HTYPE = constants.HTYPE_NODE
5963 _NFLAGS = ["master_capable", "vm_capable"]
5965 def CheckArguments(self):
5966 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5967 # validate/normalize the node name
5968 self.hostname = netutils.GetHostname(name=self.op.node_name,
5969 family=self.primary_ip_family)
5970 self.op.node_name = self.hostname.name
5972 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5973 raise errors.OpPrereqError("Cannot readd the master node",
5976 if self.op.readd and self.op.group:
5977 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5978 " being readded", errors.ECODE_INVAL)
5980 def BuildHooksEnv(self):
5983 This will run on all nodes before, and on all nodes + the new node after.
5987 "OP_TARGET": self.op.node_name,
5988 "NODE_NAME": self.op.node_name,
5989 "NODE_PIP": self.op.primary_ip,
5990 "NODE_SIP": self.op.secondary_ip,
5991 "MASTER_CAPABLE": str(self.op.master_capable),
5992 "VM_CAPABLE": str(self.op.vm_capable),
5995 def BuildHooksNodes(self):
5996 """Build hooks nodes.
5999 # Exclude added node
6000 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6001 post_nodes = pre_nodes + [self.op.node_name, ]
6003 return (pre_nodes, post_nodes)
6005 def CheckPrereq(self):
6006 """Check prerequisites.
6009 - the new node is not already in the config
6011 - its parameters (single/dual homed) matches the cluster
6013 Any errors are signaled by raising errors.OpPrereqError.
6017 hostname = self.hostname
6018 node = hostname.name
6019 primary_ip = self.op.primary_ip = hostname.ip
6020 if self.op.secondary_ip is None:
6021 if self.primary_ip_family == netutils.IP6Address.family:
6022 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6023 " IPv4 address must be given as secondary",
6025 self.op.secondary_ip = primary_ip
6027 secondary_ip = self.op.secondary_ip
6028 if not netutils.IP4Address.IsValid(secondary_ip):
6029 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6030 " address" % secondary_ip, errors.ECODE_INVAL)
6032 node_list = cfg.GetNodeList()
6033 if not self.op.readd and node in node_list:
6034 raise errors.OpPrereqError("Node %s is already in the configuration" %
6035 node, errors.ECODE_EXISTS)
6036 elif self.op.readd and node not in node_list:
6037 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6040 self.changed_primary_ip = False
6042 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6043 if self.op.readd and node == existing_node_name:
6044 if existing_node.secondary_ip != secondary_ip:
6045 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6046 " address configuration as before",
6048 if existing_node.primary_ip != primary_ip:
6049 self.changed_primary_ip = True
6053 if (existing_node.primary_ip == primary_ip or
6054 existing_node.secondary_ip == primary_ip or
6055 existing_node.primary_ip == secondary_ip or
6056 existing_node.secondary_ip == secondary_ip):
6057 raise errors.OpPrereqError("New node ip address(es) conflict with"
6058 " existing node %s" % existing_node.name,
6059 errors.ECODE_NOTUNIQUE)
6061 # After this 'if' block, None is no longer a valid value for the
6062 # _capable op attributes
6064 old_node = self.cfg.GetNodeInfo(node)
6065 assert old_node is not None, "Can't retrieve locked node %s" % node
6066 for attr in self._NFLAGS:
6067 if getattr(self.op, attr) is None:
6068 setattr(self.op, attr, getattr(old_node, attr))
6070 for attr in self._NFLAGS:
6071 if getattr(self.op, attr) is None:
6072 setattr(self.op, attr, True)
6074 if self.op.readd and not self.op.vm_capable:
6075 pri, sec = cfg.GetNodeInstances(node)
6077 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6078 " flag set to false, but it already holds"
6079 " instances" % node,
6082 # check that the type of the node (single versus dual homed) is the
6083 # same as for the master
6084 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6085 master_singlehomed = myself.secondary_ip == myself.primary_ip
6086 newbie_singlehomed = secondary_ip == primary_ip
6087 if master_singlehomed != newbie_singlehomed:
6088 if master_singlehomed:
6089 raise errors.OpPrereqError("The master has no secondary ip but the"
6090 " new node has one",
6093 raise errors.OpPrereqError("The master has a secondary ip but the"
6094 " new node doesn't have one",
6097 # checks reachability
6098 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6099 raise errors.OpPrereqError("Node not reachable by ping",
6100 errors.ECODE_ENVIRON)
6102 if not newbie_singlehomed:
6103 # check reachability from my secondary ip to newbie's secondary ip
6104 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6105 source=myself.secondary_ip):
6106 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6107 " based ping to node daemon port",
6108 errors.ECODE_ENVIRON)
6115 if self.op.master_capable:
6116 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6118 self.master_candidate = False
6121 self.new_node = old_node
6123 node_group = cfg.LookupNodeGroup(self.op.group)
6124 self.new_node = objects.Node(name=node,
6125 primary_ip=primary_ip,
6126 secondary_ip=secondary_ip,
6127 master_candidate=self.master_candidate,
6128 offline=False, drained=False,
6129 group=node_group, ndparams={})
6131 if self.op.ndparams:
6132 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6133 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6134 "node", "cluster or group")
6136 if self.op.hv_state:
6137 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6139 if self.op.disk_state:
6140 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6142 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6143 # it a property on the base class.
6144 rpcrunner = rpc.DnsOnlyRunner()
6145 result = rpcrunner.call_version([node])[node]
6146 result.Raise("Can't get version information from node %s" % node)
6147 if constants.PROTOCOL_VERSION == result.payload:
6148 logging.info("Communication to node %s fine, sw version %s match",
6149 node, result.payload)
6151 raise errors.OpPrereqError("Version mismatch master version %s,"
6152 " node version %s" %
6153 (constants.PROTOCOL_VERSION, result.payload),
6154 errors.ECODE_ENVIRON)
6156 vg_name = cfg.GetVGName()
6157 if vg_name is not None:
6158 vparams = {constants.NV_PVLIST: [vg_name]}
6159 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6160 cname = self.cfg.GetClusterName()
6161 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6162 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6164 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6165 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6167 def Exec(self, feedback_fn):
6168 """Adds the new node to the cluster.
6171 new_node = self.new_node
6172 node = new_node.name
6174 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6177 # We adding a new node so we assume it's powered
6178 new_node.powered = True
6180 # for re-adds, reset the offline/drained/master-candidate flags;
6181 # we need to reset here, otherwise offline would prevent RPC calls
6182 # later in the procedure; this also means that if the re-add
6183 # fails, we are left with a non-offlined, broken node
6185 new_node.drained = new_node.offline = False # pylint: disable=W0201
6186 self.LogInfo("Readding a node, the offline/drained flags were reset")
6187 # if we demote the node, we do cleanup later in the procedure
6188 new_node.master_candidate = self.master_candidate
6189 if self.changed_primary_ip:
6190 new_node.primary_ip = self.op.primary_ip
6192 # copy the master/vm_capable flags
6193 for attr in self._NFLAGS:
6194 setattr(new_node, attr, getattr(self.op, attr))
6196 # notify the user about any possible mc promotion
6197 if new_node.master_candidate:
6198 self.LogInfo("Node will be a master candidate")
6200 if self.op.ndparams:
6201 new_node.ndparams = self.op.ndparams
6203 new_node.ndparams = {}
6205 if self.op.hv_state:
6206 new_node.hv_state_static = self.new_hv_state
6208 if self.op.disk_state:
6209 new_node.disk_state_static = self.new_disk_state
6211 # Add node to our /etc/hosts, and add key to known_hosts
6212 if self.cfg.GetClusterInfo().modify_etc_hosts:
6213 master_node = self.cfg.GetMasterNode()
6214 result = self.rpc.call_etc_hosts_modify(master_node,
6215 constants.ETC_HOSTS_ADD,
6218 result.Raise("Can't update hosts file with new host data")
6220 if new_node.secondary_ip != new_node.primary_ip:
6221 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6224 node_verify_list = [self.cfg.GetMasterNode()]
6225 node_verify_param = {
6226 constants.NV_NODELIST: ([node], {}),
6227 # TODO: do a node-net-test as well?
6230 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6231 self.cfg.GetClusterName())
6232 for verifier in node_verify_list:
6233 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6234 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6236 for failed in nl_payload:
6237 feedback_fn("ssh/hostname verification failed"
6238 " (checking from %s): %s" %
6239 (verifier, nl_payload[failed]))
6240 raise errors.OpExecError("ssh/hostname verification failed")
6243 _RedistributeAncillaryFiles(self)
6244 self.context.ReaddNode(new_node)
6245 # make sure we redistribute the config
6246 self.cfg.Update(new_node, feedback_fn)
6247 # and make sure the new node will not have old files around
6248 if not new_node.master_candidate:
6249 result = self.rpc.call_node_demote_from_mc(new_node.name)
6250 msg = result.fail_msg
6252 self.LogWarning("Node failed to demote itself from master"
6253 " candidate status: %s" % msg)
6255 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6256 additional_vm=self.op.vm_capable)
6257 self.context.AddNode(new_node, self.proc.GetECId())
6260 class LUNodeSetParams(LogicalUnit):
6261 """Modifies the parameters of a node.
6263 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6264 to the node role (as _ROLE_*)
6265 @cvar _R2F: a dictionary from node role to tuples of flags
6266 @cvar _FLAGS: a list of attribute names corresponding to the flags
6269 HPATH = "node-modify"
6270 HTYPE = constants.HTYPE_NODE
6272 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6274 (True, False, False): _ROLE_CANDIDATE,
6275 (False, True, False): _ROLE_DRAINED,
6276 (False, False, True): _ROLE_OFFLINE,
6277 (False, False, False): _ROLE_REGULAR,
6279 _R2F = dict((v, k) for k, v in _F2R.items())
6280 _FLAGS = ["master_candidate", "drained", "offline"]
6282 def CheckArguments(self):
6283 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6284 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6285 self.op.master_capable, self.op.vm_capable,
6286 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6288 if all_mods.count(None) == len(all_mods):
6289 raise errors.OpPrereqError("Please pass at least one modification",
6291 if all_mods.count(True) > 1:
6292 raise errors.OpPrereqError("Can't set the node into more than one"
6293 " state at the same time",
6296 # Boolean value that tells us whether we might be demoting from MC
6297 self.might_demote = (self.op.master_candidate is False or
6298 self.op.offline is True or
6299 self.op.drained is True or
6300 self.op.master_capable is False)
6302 if self.op.secondary_ip:
6303 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6304 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6305 " address" % self.op.secondary_ip,
6308 self.lock_all = self.op.auto_promote and self.might_demote
6309 self.lock_instances = self.op.secondary_ip is not None
6311 def _InstanceFilter(self, instance):
6312 """Filter for getting affected instances.
6315 return (instance.disk_template in constants.DTS_INT_MIRROR and
6316 self.op.node_name in instance.all_nodes)
6318 def ExpandNames(self):
6320 self.needed_locks = {
6321 locking.LEVEL_NODE: locking.ALL_SET,
6323 # Block allocations when all nodes are locked
6324 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6327 self.needed_locks = {
6328 locking.LEVEL_NODE: self.op.node_name,
6331 # Since modifying a node can have severe effects on currently running
6332 # operations the resource lock is at least acquired in shared mode
6333 self.needed_locks[locking.LEVEL_NODE_RES] = \
6334 self.needed_locks[locking.LEVEL_NODE]
6336 # Get all locks except nodes in shared mode; they are not used for anything
6337 # but read-only access
6338 self.share_locks = _ShareAll()
6339 self.share_locks[locking.LEVEL_NODE] = 0
6340 self.share_locks[locking.LEVEL_NODE_RES] = 0
6341 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6343 if self.lock_instances:
6344 self.needed_locks[locking.LEVEL_INSTANCE] = \
6345 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6347 def BuildHooksEnv(self):
6350 This runs on the master node.
6354 "OP_TARGET": self.op.node_name,
6355 "MASTER_CANDIDATE": str(self.op.master_candidate),
6356 "OFFLINE": str(self.op.offline),
6357 "DRAINED": str(self.op.drained),
6358 "MASTER_CAPABLE": str(self.op.master_capable),
6359 "VM_CAPABLE": str(self.op.vm_capable),
6362 def BuildHooksNodes(self):
6363 """Build hooks nodes.
6366 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6369 def CheckPrereq(self):
6370 """Check prerequisites.
6372 This only checks the instance list against the existing names.
6375 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6377 if self.lock_instances:
6378 affected_instances = \
6379 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6381 # Verify instance locks
6382 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6383 wanted_instances = frozenset(affected_instances.keys())
6384 if wanted_instances - owned_instances:
6385 raise errors.OpPrereqError("Instances affected by changing node %s's"
6386 " secondary IP address have changed since"
6387 " locks were acquired, wanted '%s', have"
6388 " '%s'; retry the operation" %
6390 utils.CommaJoin(wanted_instances),
6391 utils.CommaJoin(owned_instances)),
6394 affected_instances = None
6396 if (self.op.master_candidate is not None or
6397 self.op.drained is not None or
6398 self.op.offline is not None):
6399 # we can't change the master's node flags
6400 if self.op.node_name == self.cfg.GetMasterNode():
6401 raise errors.OpPrereqError("The master role can be changed"
6402 " only via master-failover",
6405 if self.op.master_candidate and not node.master_capable:
6406 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6407 " it a master candidate" % node.name,
6410 if self.op.vm_capable is False:
6411 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6413 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6414 " the vm_capable flag" % node.name,
6417 if node.master_candidate and self.might_demote and not self.lock_all:
6418 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6419 # check if after removing the current node, we're missing master
6421 (mc_remaining, mc_should, _) = \
6422 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6423 if mc_remaining < mc_should:
6424 raise errors.OpPrereqError("Not enough master candidates, please"
6425 " pass auto promote option to allow"
6426 " promotion (--auto-promote or RAPI"
6427 " auto_promote=True)", errors.ECODE_STATE)
6429 self.old_flags = old_flags = (node.master_candidate,
6430 node.drained, node.offline)
6431 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6432 self.old_role = old_role = self._F2R[old_flags]
6434 # Check for ineffective changes
6435 for attr in self._FLAGS:
6436 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6437 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6438 setattr(self.op, attr, None)
6440 # Past this point, any flag change to False means a transition
6441 # away from the respective state, as only real changes are kept
6443 # TODO: We might query the real power state if it supports OOB
6444 if _SupportsOob(self.cfg, node):
6445 if self.op.offline is False and not (node.powered or
6446 self.op.powered is True):
6447 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6448 " offline status can be reset") %
6449 self.op.node_name, errors.ECODE_STATE)
6450 elif self.op.powered is not None:
6451 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6452 " as it does not support out-of-band"
6453 " handling") % self.op.node_name,
6456 # If we're being deofflined/drained, we'll MC ourself if needed
6457 if (self.op.drained is False or self.op.offline is False or
6458 (self.op.master_capable and not node.master_capable)):
6459 if _DecideSelfPromotion(self):
6460 self.op.master_candidate = True
6461 self.LogInfo("Auto-promoting node to master candidate")
6463 # If we're no longer master capable, we'll demote ourselves from MC
6464 if self.op.master_capable is False and node.master_candidate:
6465 self.LogInfo("Demoting from master candidate")
6466 self.op.master_candidate = False
6469 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6470 if self.op.master_candidate:
6471 new_role = self._ROLE_CANDIDATE
6472 elif self.op.drained:
6473 new_role = self._ROLE_DRAINED
6474 elif self.op.offline:
6475 new_role = self._ROLE_OFFLINE
6476 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6477 # False is still in new flags, which means we're un-setting (the
6479 new_role = self._ROLE_REGULAR
6480 else: # no new flags, nothing, keep old role
6483 self.new_role = new_role
6485 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6486 # Trying to transition out of offline status
6487 result = self.rpc.call_version([node.name])[node.name]
6489 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6490 " to report its version: %s" %
6491 (node.name, result.fail_msg),
6494 self.LogWarning("Transitioning node from offline to online state"
6495 " without using re-add. Please make sure the node"
6498 # When changing the secondary ip, verify if this is a single-homed to
6499 # multi-homed transition or vice versa, and apply the relevant
6501 if self.op.secondary_ip:
6502 # Ok even without locking, because this can't be changed by any LU
6503 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6504 master_singlehomed = master.secondary_ip == master.primary_ip
6505 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6506 if self.op.force and node.name == master.name:
6507 self.LogWarning("Transitioning from single-homed to multi-homed"
6508 " cluster; all nodes will require a secondary IP"
6511 raise errors.OpPrereqError("Changing the secondary ip on a"
6512 " single-homed cluster requires the"
6513 " --force option to be passed, and the"
6514 " target node to be the master",
6516 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6517 if self.op.force and node.name == master.name:
6518 self.LogWarning("Transitioning from multi-homed to single-homed"
6519 " cluster; secondary IP addresses will have to be"
6522 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6523 " same as the primary IP on a multi-homed"
6524 " cluster, unless the --force option is"
6525 " passed, and the target node is the"
6526 " master", errors.ECODE_INVAL)
6528 assert not (frozenset(affected_instances) -
6529 self.owned_locks(locking.LEVEL_INSTANCE))
6532 if affected_instances:
6533 msg = ("Cannot change secondary IP address: offline node has"
6534 " instances (%s) configured to use it" %
6535 utils.CommaJoin(affected_instances.keys()))
6536 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6538 # On online nodes, check that no instances are running, and that
6539 # the node has the new ip and we can reach it.
6540 for instance in affected_instances.values():
6541 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6542 msg="cannot change secondary ip")
6544 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6545 if master.name != node.name:
6546 # check reachability from master secondary ip to new secondary ip
6547 if not netutils.TcpPing(self.op.secondary_ip,
6548 constants.DEFAULT_NODED_PORT,
6549 source=master.secondary_ip):
6550 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6551 " based ping to node daemon port",
6552 errors.ECODE_ENVIRON)
6554 if self.op.ndparams:
6555 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6556 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6557 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6558 "node", "cluster or group")
6559 self.new_ndparams = new_ndparams
6561 if self.op.hv_state:
6562 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6563 self.node.hv_state_static)
6565 if self.op.disk_state:
6566 self.new_disk_state = \
6567 _MergeAndVerifyDiskState(self.op.disk_state,
6568 self.node.disk_state_static)
6570 def Exec(self, feedback_fn):
6575 old_role = self.old_role
6576 new_role = self.new_role
6580 if self.op.ndparams:
6581 node.ndparams = self.new_ndparams
6583 if self.op.powered is not None:
6584 node.powered = self.op.powered
6586 if self.op.hv_state:
6587 node.hv_state_static = self.new_hv_state
6589 if self.op.disk_state:
6590 node.disk_state_static = self.new_disk_state
6592 for attr in ["master_capable", "vm_capable"]:
6593 val = getattr(self.op, attr)
6595 setattr(node, attr, val)
6596 result.append((attr, str(val)))
6598 if new_role != old_role:
6599 # Tell the node to demote itself, if no longer MC and not offline
6600 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6601 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6603 self.LogWarning("Node failed to demote itself: %s", msg)
6605 new_flags = self._R2F[new_role]
6606 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6608 result.append((desc, str(nf)))
6609 (node.master_candidate, node.drained, node.offline) = new_flags
6611 # we locked all nodes, we adjust the CP before updating this node
6613 _AdjustCandidatePool(self, [node.name])
6615 if self.op.secondary_ip:
6616 node.secondary_ip = self.op.secondary_ip
6617 result.append(("secondary_ip", self.op.secondary_ip))
6619 # this will trigger configuration file update, if needed
6620 self.cfg.Update(node, feedback_fn)
6622 # this will trigger job queue propagation or cleanup if the mc
6624 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6625 self.context.ReaddNode(node)
6630 class LUNodePowercycle(NoHooksLU):
6631 """Powercycles a node.
6636 def CheckArguments(self):
6637 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6638 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6639 raise errors.OpPrereqError("The node is the master and the force"
6640 " parameter was not set",
6643 def ExpandNames(self):
6644 """Locking for PowercycleNode.
6646 This is a last-resort option and shouldn't block on other
6647 jobs. Therefore, we grab no locks.
6650 self.needed_locks = {}
6652 def Exec(self, feedback_fn):
6656 result = self.rpc.call_node_powercycle(self.op.node_name,
6657 self.cfg.GetHypervisorType())
6658 result.Raise("Failed to schedule the reboot")
6659 return result.payload
6662 class LUClusterQuery(NoHooksLU):
6663 """Query cluster configuration.
6668 def ExpandNames(self):
6669 self.needed_locks = {}
6671 def Exec(self, feedback_fn):
6672 """Return cluster config.
6675 cluster = self.cfg.GetClusterInfo()
6678 # Filter just for enabled hypervisors
6679 for os_name, hv_dict in cluster.os_hvp.items():
6680 os_hvp[os_name] = {}
6681 for hv_name, hv_params in hv_dict.items():
6682 if hv_name in cluster.enabled_hypervisors:
6683 os_hvp[os_name][hv_name] = hv_params
6685 # Convert ip_family to ip_version
6686 primary_ip_version = constants.IP4_VERSION
6687 if cluster.primary_ip_family == netutils.IP6Address.family:
6688 primary_ip_version = constants.IP6_VERSION
6691 "software_version": constants.RELEASE_VERSION,
6692 "protocol_version": constants.PROTOCOL_VERSION,
6693 "config_version": constants.CONFIG_VERSION,
6694 "os_api_version": max(constants.OS_API_VERSIONS),
6695 "export_version": constants.EXPORT_VERSION,
6696 "architecture": runtime.GetArchInfo(),
6697 "name": cluster.cluster_name,
6698 "master": cluster.master_node,
6699 "default_hypervisor": cluster.primary_hypervisor,
6700 "enabled_hypervisors": cluster.enabled_hypervisors,
6701 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6702 for hypervisor_name in cluster.enabled_hypervisors]),
6704 "beparams": cluster.beparams,
6705 "osparams": cluster.osparams,
6706 "ipolicy": cluster.ipolicy,
6707 "nicparams": cluster.nicparams,
6708 "ndparams": cluster.ndparams,
6709 "diskparams": cluster.diskparams,
6710 "candidate_pool_size": cluster.candidate_pool_size,
6711 "master_netdev": cluster.master_netdev,
6712 "master_netmask": cluster.master_netmask,
6713 "use_external_mip_script": cluster.use_external_mip_script,
6714 "volume_group_name": cluster.volume_group_name,
6715 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6716 "file_storage_dir": cluster.file_storage_dir,
6717 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6718 "maintain_node_health": cluster.maintain_node_health,
6719 "ctime": cluster.ctime,
6720 "mtime": cluster.mtime,
6721 "uuid": cluster.uuid,
6722 "tags": list(cluster.GetTags()),
6723 "uid_pool": cluster.uid_pool,
6724 "default_iallocator": cluster.default_iallocator,
6725 "reserved_lvs": cluster.reserved_lvs,
6726 "primary_ip_version": primary_ip_version,
6727 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6728 "hidden_os": cluster.hidden_os,
6729 "blacklisted_os": cluster.blacklisted_os,
6735 class LUClusterConfigQuery(NoHooksLU):
6736 """Return configuration values.
6741 def CheckArguments(self):
6742 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6744 def ExpandNames(self):
6745 self.cq.ExpandNames(self)
6747 def DeclareLocks(self, level):
6748 self.cq.DeclareLocks(self, level)
6750 def Exec(self, feedback_fn):
6751 result = self.cq.OldStyleQuery(self)
6753 assert len(result) == 1
6758 class _ClusterQuery(_QueryBase):
6759 FIELDS = query.CLUSTER_FIELDS
6761 #: Do not sort (there is only one item)
6764 def ExpandNames(self, lu):
6765 lu.needed_locks = {}
6767 # The following variables interact with _QueryBase._GetNames
6768 self.wanted = locking.ALL_SET
6769 self.do_locking = self.use_locking
6772 raise errors.OpPrereqError("Can not use locking for cluster queries",
6775 def DeclareLocks(self, lu, level):
6778 def _GetQueryData(self, lu):
6779 """Computes the list of nodes and their attributes.
6782 # Locking is not used
6783 assert not (compat.any(lu.glm.is_owned(level)
6784 for level in locking.LEVELS
6785 if level != locking.LEVEL_CLUSTER) or
6786 self.do_locking or self.use_locking)
6788 if query.CQ_CONFIG in self.requested_data:
6789 cluster = lu.cfg.GetClusterInfo()
6791 cluster = NotImplemented
6793 if query.CQ_QUEUE_DRAINED in self.requested_data:
6794 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6796 drain_flag = NotImplemented
6798 if query.CQ_WATCHER_PAUSE in self.requested_data:
6799 master_name = lu.cfg.GetMasterNode()
6801 result = lu.rpc.call_get_watcher_pause(master_name)
6802 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6805 watcher_pause = result.payload
6807 watcher_pause = NotImplemented
6809 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6812 class LUInstanceActivateDisks(NoHooksLU):
6813 """Bring up an instance's disks.
6818 def ExpandNames(self):
6819 self._ExpandAndLockInstance()
6820 self.needed_locks[locking.LEVEL_NODE] = []
6821 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6823 def DeclareLocks(self, level):
6824 if level == locking.LEVEL_NODE:
6825 self._LockInstancesNodes()
6827 def CheckPrereq(self):
6828 """Check prerequisites.
6830 This checks that the instance is in the cluster.
6833 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6834 assert self.instance is not None, \
6835 "Cannot retrieve locked instance %s" % self.op.instance_name
6836 _CheckNodeOnline(self, self.instance.primary_node)
6838 def Exec(self, feedback_fn):
6839 """Activate the disks.
6842 disks_ok, disks_info = \
6843 _AssembleInstanceDisks(self, self.instance,
6844 ignore_size=self.op.ignore_size)
6846 raise errors.OpExecError("Cannot activate block devices")
6848 if self.op.wait_for_sync:
6849 if not _WaitForSync(self, self.instance):
6850 raise errors.OpExecError("Some disks of the instance are degraded!")
6855 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6857 """Prepare the block devices for an instance.
6859 This sets up the block devices on all nodes.
6861 @type lu: L{LogicalUnit}
6862 @param lu: the logical unit on whose behalf we execute
6863 @type instance: L{objects.Instance}
6864 @param instance: the instance for whose disks we assemble
6865 @type disks: list of L{objects.Disk} or None
6866 @param disks: which disks to assemble (or all, if None)
6867 @type ignore_secondaries: boolean
6868 @param ignore_secondaries: if true, errors on secondary nodes
6869 won't result in an error return from the function
6870 @type ignore_size: boolean
6871 @param ignore_size: if true, the current known size of the disk
6872 will not be used during the disk activation, useful for cases
6873 when the size is wrong
6874 @return: False if the operation failed, otherwise a list of
6875 (host, instance_visible_name, node_visible_name)
6876 with the mapping from node devices to instance devices
6881 iname = instance.name
6882 disks = _ExpandCheckDisks(instance, disks)
6884 # With the two passes mechanism we try to reduce the window of
6885 # opportunity for the race condition of switching DRBD to primary
6886 # before handshaking occured, but we do not eliminate it
6888 # The proper fix would be to wait (with some limits) until the
6889 # connection has been made and drbd transitions from WFConnection
6890 # into any other network-connected state (Connected, SyncTarget,
6893 # 1st pass, assemble on all nodes in secondary mode
6894 for idx, inst_disk in enumerate(disks):
6895 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6897 node_disk = node_disk.Copy()
6898 node_disk.UnsetSize()
6899 lu.cfg.SetDiskID(node_disk, node)
6900 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6902 msg = result.fail_msg
6904 is_offline_secondary = (node in instance.secondary_nodes and
6906 lu.LogWarning("Could not prepare block device %s on node %s"
6907 " (is_primary=False, pass=1): %s",
6908 inst_disk.iv_name, node, msg)
6909 if not (ignore_secondaries or is_offline_secondary):
6912 # FIXME: race condition on drbd migration to primary
6914 # 2nd pass, do only the primary node
6915 for idx, inst_disk in enumerate(disks):
6918 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6919 if node != instance.primary_node:
6922 node_disk = node_disk.Copy()
6923 node_disk.UnsetSize()
6924 lu.cfg.SetDiskID(node_disk, node)
6925 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6927 msg = result.fail_msg
6929 lu.LogWarning("Could not prepare block device %s on node %s"
6930 " (is_primary=True, pass=2): %s",
6931 inst_disk.iv_name, node, msg)
6934 dev_path = result.payload
6936 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6938 # leave the disks configured for the primary node
6939 # this is a workaround that would be fixed better by
6940 # improving the logical/physical id handling
6942 lu.cfg.SetDiskID(disk, instance.primary_node)
6944 return disks_ok, device_info
6947 def _StartInstanceDisks(lu, instance, force):
6948 """Start the disks of an instance.
6951 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6952 ignore_secondaries=force)
6954 _ShutdownInstanceDisks(lu, instance)
6955 if force is not None and not force:
6957 hint=("If the message above refers to a secondary node,"
6958 " you can retry the operation using '--force'"))
6959 raise errors.OpExecError("Disk consistency error")
6962 class LUInstanceDeactivateDisks(NoHooksLU):
6963 """Shutdown an instance's disks.
6968 def ExpandNames(self):
6969 self._ExpandAndLockInstance()
6970 self.needed_locks[locking.LEVEL_NODE] = []
6971 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6973 def DeclareLocks(self, level):
6974 if level == locking.LEVEL_NODE:
6975 self._LockInstancesNodes()
6977 def CheckPrereq(self):
6978 """Check prerequisites.
6980 This checks that the instance is in the cluster.
6983 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984 assert self.instance is not None, \
6985 "Cannot retrieve locked instance %s" % self.op.instance_name
6987 def Exec(self, feedback_fn):
6988 """Deactivate the disks
6991 instance = self.instance
6993 _ShutdownInstanceDisks(self, instance)
6995 _SafeShutdownInstanceDisks(self, instance)
6998 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6999 """Shutdown block devices of an instance.
7001 This function checks if an instance is running, before calling
7002 _ShutdownInstanceDisks.
7005 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7006 _ShutdownInstanceDisks(lu, instance, disks=disks)
7009 def _ExpandCheckDisks(instance, disks):
7010 """Return the instance disks selected by the disks list
7012 @type disks: list of L{objects.Disk} or None
7013 @param disks: selected disks
7014 @rtype: list of L{objects.Disk}
7015 @return: selected instance disks to act on
7019 return instance.disks
7021 if not set(disks).issubset(instance.disks):
7022 raise errors.ProgrammerError("Can only act on disks belonging to the"
7027 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7028 """Shutdown block devices of an instance.
7030 This does the shutdown on all nodes of the instance.
7032 If the ignore_primary is false, errors on the primary node are
7037 disks = _ExpandCheckDisks(instance, disks)
7040 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7041 lu.cfg.SetDiskID(top_disk, node)
7042 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7043 msg = result.fail_msg
7045 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7046 disk.iv_name, node, msg)
7047 if ((node == instance.primary_node and not ignore_primary) or
7048 (node != instance.primary_node and not result.offline)):
7053 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7054 """Checks if a node has enough free memory.
7056 This function checks if a given node has the needed amount of free
7057 memory. In case the node has less memory or we cannot get the
7058 information from the node, this function raises an OpPrereqError
7061 @type lu: C{LogicalUnit}
7062 @param lu: a logical unit from which we get configuration data
7064 @param node: the node to check
7065 @type reason: C{str}
7066 @param reason: string to use in the error message
7067 @type requested: C{int}
7068 @param requested: the amount of memory in MiB to check for
7069 @type hypervisor_name: C{str}
7070 @param hypervisor_name: the hypervisor to ask for memory stats
7072 @return: node current free memory
7073 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7074 we cannot check the node
7077 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7078 nodeinfo[node].Raise("Can't get data from node %s" % node,
7079 prereq=True, ecode=errors.ECODE_ENVIRON)
7080 (_, _, (hv_info, )) = nodeinfo[node].payload
7082 free_mem = hv_info.get("memory_free", None)
7083 if not isinstance(free_mem, int):
7084 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7085 " was '%s'" % (node, free_mem),
7086 errors.ECODE_ENVIRON)
7087 if requested > free_mem:
7088 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7089 " needed %s MiB, available %s MiB" %
7090 (node, reason, requested, free_mem),
7095 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7096 """Checks if nodes have enough free disk space in all the VGs.
7098 This function checks if all given nodes have the needed amount of
7099 free disk. In case any node has less disk or we cannot get the
7100 information from the node, this function raises an OpPrereqError
7103 @type lu: C{LogicalUnit}
7104 @param lu: a logical unit from which we get configuration data
7105 @type nodenames: C{list}
7106 @param nodenames: the list of node names to check
7107 @type req_sizes: C{dict}
7108 @param req_sizes: the hash of vg and corresponding amount of disk in
7110 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7111 or we cannot check the node
7114 for vg, req_size in req_sizes.items():
7115 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7118 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7119 """Checks if nodes have enough free disk space in the specified VG.
7121 This function checks if all given nodes have the needed amount of
7122 free disk. In case any node has less disk or we cannot get the
7123 information from the node, this function raises an OpPrereqError
7126 @type lu: C{LogicalUnit}
7127 @param lu: a logical unit from which we get configuration data
7128 @type nodenames: C{list}
7129 @param nodenames: the list of node names to check
7131 @param vg: the volume group to check
7132 @type requested: C{int}
7133 @param requested: the amount of disk in MiB to check for
7134 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7135 or we cannot check the node
7138 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7139 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7140 for node in nodenames:
7141 info = nodeinfo[node]
7142 info.Raise("Cannot get current information from node %s" % node,
7143 prereq=True, ecode=errors.ECODE_ENVIRON)
7144 (_, (vg_info, ), _) = info.payload
7145 vg_free = vg_info.get("vg_free", None)
7146 if not isinstance(vg_free, int):
7147 raise errors.OpPrereqError("Can't compute free disk space on node"
7148 " %s for vg %s, result was '%s'" %
7149 (node, vg, vg_free), errors.ECODE_ENVIRON)
7150 if requested > vg_free:
7151 raise errors.OpPrereqError("Not enough disk space on target node %s"
7152 " vg %s: required %d MiB, available %d MiB" %
7153 (node, vg, requested, vg_free),
7157 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7158 """Checks if nodes have enough physical CPUs
7160 This function checks if all given nodes have the needed number of
7161 physical CPUs. In case any node has less CPUs or we cannot get the
7162 information from the node, this function raises an OpPrereqError
7165 @type lu: C{LogicalUnit}
7166 @param lu: a logical unit from which we get configuration data
7167 @type nodenames: C{list}
7168 @param nodenames: the list of node names to check
7169 @type requested: C{int}
7170 @param requested: the minimum acceptable number of physical CPUs
7171 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7172 or we cannot check the node
7175 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7176 for node in nodenames:
7177 info = nodeinfo[node]
7178 info.Raise("Cannot get current information from node %s" % node,
7179 prereq=True, ecode=errors.ECODE_ENVIRON)
7180 (_, _, (hv_info, )) = info.payload
7181 num_cpus = hv_info.get("cpu_total", None)
7182 if not isinstance(num_cpus, int):
7183 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7184 " on node %s, result was '%s'" %
7185 (node, num_cpus), errors.ECODE_ENVIRON)
7186 if requested > num_cpus:
7187 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7188 "required" % (node, num_cpus, requested),
7192 class LUInstanceStartup(LogicalUnit):
7193 """Starts an instance.
7196 HPATH = "instance-start"
7197 HTYPE = constants.HTYPE_INSTANCE
7200 def CheckArguments(self):
7202 if self.op.beparams:
7203 # fill the beparams dict
7204 objects.UpgradeBeParams(self.op.beparams)
7205 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7207 def ExpandNames(self):
7208 self._ExpandAndLockInstance()
7209 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7211 def DeclareLocks(self, level):
7212 if level == locking.LEVEL_NODE_RES:
7213 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7215 def BuildHooksEnv(self):
7218 This runs on master, primary and secondary nodes of the instance.
7222 "FORCE": self.op.force,
7225 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7229 def BuildHooksNodes(self):
7230 """Build hooks nodes.
7233 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7236 def CheckPrereq(self):
7237 """Check prerequisites.
7239 This checks that the instance is in the cluster.
7242 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7243 assert self.instance is not None, \
7244 "Cannot retrieve locked instance %s" % self.op.instance_name
7247 if self.op.hvparams:
7248 # check hypervisor parameter syntax (locally)
7249 cluster = self.cfg.GetClusterInfo()
7250 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7251 filled_hvp = cluster.FillHV(instance)
7252 filled_hvp.update(self.op.hvparams)
7253 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7254 hv_type.CheckParameterSyntax(filled_hvp)
7255 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7257 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7259 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7261 if self.primary_offline and self.op.ignore_offline_nodes:
7262 self.LogWarning("Ignoring offline primary node")
7264 if self.op.hvparams or self.op.beparams:
7265 self.LogWarning("Overridden parameters are ignored")
7267 _CheckNodeOnline(self, instance.primary_node)
7269 bep = self.cfg.GetClusterInfo().FillBE(instance)
7270 bep.update(self.op.beparams)
7272 # check bridges existence
7273 _CheckInstanceBridgesExist(self, instance)
7275 remote_info = self.rpc.call_instance_info(instance.primary_node,
7277 instance.hypervisor)
7278 remote_info.Raise("Error checking node %s" % instance.primary_node,
7279 prereq=True, ecode=errors.ECODE_ENVIRON)
7280 if not remote_info.payload: # not running already
7281 _CheckNodeFreeMemory(self, instance.primary_node,
7282 "starting instance %s" % instance.name,
7283 bep[constants.BE_MINMEM], instance.hypervisor)
7285 def Exec(self, feedback_fn):
7286 """Start the instance.
7289 instance = self.instance
7290 force = self.op.force
7292 if not self.op.no_remember:
7293 self.cfg.MarkInstanceUp(instance.name)
7295 if self.primary_offline:
7296 assert self.op.ignore_offline_nodes
7297 self.LogInfo("Primary node offline, marked instance as started")
7299 node_current = instance.primary_node
7301 _StartInstanceDisks(self, instance, force)
7304 self.rpc.call_instance_start(node_current,
7305 (instance, self.op.hvparams,
7307 self.op.startup_paused)
7308 msg = result.fail_msg
7310 _ShutdownInstanceDisks(self, instance)
7311 raise errors.OpExecError("Could not start instance: %s" % msg)
7314 class LUInstanceReboot(LogicalUnit):
7315 """Reboot an instance.
7318 HPATH = "instance-reboot"
7319 HTYPE = constants.HTYPE_INSTANCE
7322 def ExpandNames(self):
7323 self._ExpandAndLockInstance()
7325 def BuildHooksEnv(self):
7328 This runs on master, primary and secondary nodes of the instance.
7332 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7333 "REBOOT_TYPE": self.op.reboot_type,
7334 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7337 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7341 def BuildHooksNodes(self):
7342 """Build hooks nodes.
7345 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7348 def CheckPrereq(self):
7349 """Check prerequisites.
7351 This checks that the instance is in the cluster.
7354 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7355 assert self.instance is not None, \
7356 "Cannot retrieve locked instance %s" % self.op.instance_name
7357 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7358 _CheckNodeOnline(self, instance.primary_node)
7360 # check bridges existence
7361 _CheckInstanceBridgesExist(self, instance)
7363 def Exec(self, feedback_fn):
7364 """Reboot the instance.
7367 instance = self.instance
7368 ignore_secondaries = self.op.ignore_secondaries
7369 reboot_type = self.op.reboot_type
7371 remote_info = self.rpc.call_instance_info(instance.primary_node,
7373 instance.hypervisor)
7374 remote_info.Raise("Error checking node %s" % instance.primary_node)
7375 instance_running = bool(remote_info.payload)
7377 node_current = instance.primary_node
7379 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7380 constants.INSTANCE_REBOOT_HARD]:
7381 for disk in instance.disks:
7382 self.cfg.SetDiskID(disk, node_current)
7383 result = self.rpc.call_instance_reboot(node_current, instance,
7385 self.op.shutdown_timeout)
7386 result.Raise("Could not reboot instance")
7388 if instance_running:
7389 result = self.rpc.call_instance_shutdown(node_current, instance,
7390 self.op.shutdown_timeout)
7391 result.Raise("Could not shutdown instance for full reboot")
7392 _ShutdownInstanceDisks(self, instance)
7394 self.LogInfo("Instance %s was already stopped, starting now",
7396 _StartInstanceDisks(self, instance, ignore_secondaries)
7397 result = self.rpc.call_instance_start(node_current,
7398 (instance, None, None), False)
7399 msg = result.fail_msg
7401 _ShutdownInstanceDisks(self, instance)
7402 raise errors.OpExecError("Could not start instance for"
7403 " full reboot: %s" % msg)
7405 self.cfg.MarkInstanceUp(instance.name)
7408 class LUInstanceShutdown(LogicalUnit):
7409 """Shutdown an instance.
7412 HPATH = "instance-stop"
7413 HTYPE = constants.HTYPE_INSTANCE
7416 def ExpandNames(self):
7417 self._ExpandAndLockInstance()
7419 def BuildHooksEnv(self):
7422 This runs on master, primary and secondary nodes of the instance.
7425 env = _BuildInstanceHookEnvByObject(self, self.instance)
7426 env["TIMEOUT"] = self.op.timeout
7429 def BuildHooksNodes(self):
7430 """Build hooks nodes.
7433 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7436 def CheckPrereq(self):
7437 """Check prerequisites.
7439 This checks that the instance is in the cluster.
7442 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7443 assert self.instance is not None, \
7444 "Cannot retrieve locked instance %s" % self.op.instance_name
7446 if not self.op.force:
7447 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7449 self.LogWarning("Ignoring offline instance check")
7451 self.primary_offline = \
7452 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7454 if self.primary_offline and self.op.ignore_offline_nodes:
7455 self.LogWarning("Ignoring offline primary node")
7457 _CheckNodeOnline(self, self.instance.primary_node)
7459 def Exec(self, feedback_fn):
7460 """Shutdown the instance.
7463 instance = self.instance
7464 node_current = instance.primary_node
7465 timeout = self.op.timeout
7467 # If the instance is offline we shouldn't mark it as down, as that
7468 # resets the offline flag.
7469 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7470 self.cfg.MarkInstanceDown(instance.name)
7472 if self.primary_offline:
7473 assert self.op.ignore_offline_nodes
7474 self.LogInfo("Primary node offline, marked instance as stopped")
7476 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7477 msg = result.fail_msg
7479 self.LogWarning("Could not shutdown instance: %s", msg)
7481 _ShutdownInstanceDisks(self, instance)
7484 class LUInstanceReinstall(LogicalUnit):
7485 """Reinstall an instance.
7488 HPATH = "instance-reinstall"
7489 HTYPE = constants.HTYPE_INSTANCE
7492 def ExpandNames(self):
7493 self._ExpandAndLockInstance()
7495 def BuildHooksEnv(self):
7498 This runs on master, primary and secondary nodes of the instance.
7501 return _BuildInstanceHookEnvByObject(self, self.instance)
7503 def BuildHooksNodes(self):
7504 """Build hooks nodes.
7507 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7510 def CheckPrereq(self):
7511 """Check prerequisites.
7513 This checks that the instance is in the cluster and is not running.
7516 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7517 assert instance is not None, \
7518 "Cannot retrieve locked instance %s" % self.op.instance_name
7519 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7520 " offline, cannot reinstall")
7522 if instance.disk_template == constants.DT_DISKLESS:
7523 raise errors.OpPrereqError("Instance '%s' has no disks" %
7524 self.op.instance_name,
7526 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7528 if self.op.os_type is not None:
7530 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7531 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7532 instance_os = self.op.os_type
7534 instance_os = instance.os
7536 nodelist = list(instance.all_nodes)
7538 if self.op.osparams:
7539 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7540 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7541 self.os_inst = i_osdict # the new dict (without defaults)
7545 self.instance = instance
7547 def Exec(self, feedback_fn):
7548 """Reinstall the instance.
7551 inst = self.instance
7553 if self.op.os_type is not None:
7554 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7555 inst.os = self.op.os_type
7556 # Write to configuration
7557 self.cfg.Update(inst, feedback_fn)
7559 _StartInstanceDisks(self, inst, None)
7561 feedback_fn("Running the instance OS create scripts...")
7562 # FIXME: pass debug option from opcode to backend
7563 result = self.rpc.call_instance_os_add(inst.primary_node,
7564 (inst, self.os_inst), True,
7565 self.op.debug_level)
7566 result.Raise("Could not install OS for instance %s on node %s" %
7567 (inst.name, inst.primary_node))
7569 _ShutdownInstanceDisks(self, inst)
7572 class LUInstanceRecreateDisks(LogicalUnit):
7573 """Recreate an instance's missing disks.
7576 HPATH = "instance-recreate-disks"
7577 HTYPE = constants.HTYPE_INSTANCE
7580 _MODIFYABLE = compat.UniqueFrozenset([
7581 constants.IDISK_SIZE,
7582 constants.IDISK_MODE,
7585 # New or changed disk parameters may have different semantics
7586 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7587 constants.IDISK_ADOPT,
7589 # TODO: Implement support changing VG while recreating
7591 constants.IDISK_METAVG,
7592 constants.IDISK_PROVIDER,
7595 def _RunAllocator(self):
7596 """Run the allocator based on input opcode.
7599 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7602 # The allocator should actually run in "relocate" mode, but current
7603 # allocators don't support relocating all the nodes of an instance at
7604 # the same time. As a workaround we use "allocate" mode, but this is
7605 # suboptimal for two reasons:
7606 # - The instance name passed to the allocator is present in the list of
7607 # existing instances, so there could be a conflict within the
7608 # internal structures of the allocator. This doesn't happen with the
7609 # current allocators, but it's a liability.
7610 # - The allocator counts the resources used by the instance twice: once
7611 # because the instance exists already, and once because it tries to
7612 # allocate a new instance.
7613 # The allocator could choose some of the nodes on which the instance is
7614 # running, but that's not a problem. If the instance nodes are broken,
7615 # they should be already be marked as drained or offline, and hence
7616 # skipped by the allocator. If instance disks have been lost for other
7617 # reasons, then recreating the disks on the same nodes should be fine.
7618 disk_template = self.instance.disk_template
7619 spindle_use = be_full[constants.BE_SPINDLE_USE]
7620 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7621 disk_template=disk_template,
7622 tags=list(self.instance.GetTags()),
7623 os=self.instance.os,
7625 vcpus=be_full[constants.BE_VCPUS],
7626 memory=be_full[constants.BE_MAXMEM],
7627 spindle_use=spindle_use,
7628 disks=[{constants.IDISK_SIZE: d.size,
7629 constants.IDISK_MODE: d.mode}
7630 for d in self.instance.disks],
7631 hypervisor=self.instance.hypervisor,
7632 node_whitelist=None)
7633 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7635 ial.Run(self.op.iallocator)
7637 assert req.RequiredNodes() == len(self.instance.all_nodes)
7640 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7641 " %s" % (self.op.iallocator, ial.info),
7644 self.op.nodes = ial.result
7645 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7646 self.op.instance_name, self.op.iallocator,
7647 utils.CommaJoin(ial.result))
7649 def CheckArguments(self):
7650 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7651 # Normalize and convert deprecated list of disk indices
7652 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7654 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7656 raise errors.OpPrereqError("Some disks have been specified more than"
7657 " once: %s" % utils.CommaJoin(duplicates),
7660 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7661 # when neither iallocator nor nodes are specified
7662 if self.op.iallocator or self.op.nodes:
7663 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7665 for (idx, params) in self.op.disks:
7666 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7667 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7669 raise errors.OpPrereqError("Parameters for disk %s try to change"
7670 " unmodifyable parameter(s): %s" %
7671 (idx, utils.CommaJoin(unsupported)),
7674 def ExpandNames(self):
7675 self._ExpandAndLockInstance()
7676 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7679 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7680 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7682 self.needed_locks[locking.LEVEL_NODE] = []
7683 if self.op.iallocator:
7684 # iallocator will select a new node in the same group
7685 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7686 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7688 self.needed_locks[locking.LEVEL_NODE_RES] = []
7690 def DeclareLocks(self, level):
7691 if level == locking.LEVEL_NODEGROUP:
7692 assert self.op.iallocator is not None
7693 assert not self.op.nodes
7694 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7695 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7696 # Lock the primary group used by the instance optimistically; this
7697 # requires going via the node before it's locked, requiring
7698 # verification later on
7699 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7700 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7702 elif level == locking.LEVEL_NODE:
7703 # If an allocator is used, then we lock all the nodes in the current
7704 # instance group, as we don't know yet which ones will be selected;
7705 # if we replace the nodes without using an allocator, locks are
7706 # already declared in ExpandNames; otherwise, we need to lock all the
7707 # instance nodes for disk re-creation
7708 if self.op.iallocator:
7709 assert not self.op.nodes
7710 assert not self.needed_locks[locking.LEVEL_NODE]
7711 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7713 # Lock member nodes of the group of the primary node
7714 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7715 self.needed_locks[locking.LEVEL_NODE].extend(
7716 self.cfg.GetNodeGroup(group_uuid).members)
7718 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7719 elif not self.op.nodes:
7720 self._LockInstancesNodes(primary_only=False)
7721 elif level == locking.LEVEL_NODE_RES:
7723 self.needed_locks[locking.LEVEL_NODE_RES] = \
7724 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7726 def BuildHooksEnv(self):
7729 This runs on master, primary and secondary nodes of the instance.
7732 return _BuildInstanceHookEnvByObject(self, self.instance)
7734 def BuildHooksNodes(self):
7735 """Build hooks nodes.
7738 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7741 def CheckPrereq(self):
7742 """Check prerequisites.
7744 This checks that the instance is in the cluster and is not running.
7747 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7748 assert instance is not None, \
7749 "Cannot retrieve locked instance %s" % self.op.instance_name
7751 if len(self.op.nodes) != len(instance.all_nodes):
7752 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7753 " %d replacement nodes were specified" %
7754 (instance.name, len(instance.all_nodes),
7755 len(self.op.nodes)),
7757 assert instance.disk_template != constants.DT_DRBD8 or \
7758 len(self.op.nodes) == 2
7759 assert instance.disk_template != constants.DT_PLAIN or \
7760 len(self.op.nodes) == 1
7761 primary_node = self.op.nodes[0]
7763 primary_node = instance.primary_node
7764 if not self.op.iallocator:
7765 _CheckNodeOnline(self, primary_node)
7767 if instance.disk_template == constants.DT_DISKLESS:
7768 raise errors.OpPrereqError("Instance '%s' has no disks" %
7769 self.op.instance_name, errors.ECODE_INVAL)
7771 # Verify if node group locks are still correct
7772 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7774 # Node group locks are acquired only for the primary node (and only
7775 # when the allocator is used)
7776 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7779 # if we replace nodes *and* the old primary is offline, we don't
7780 # check the instance state
7781 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7782 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7783 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7784 msg="cannot recreate disks")
7787 self.disks = dict(self.op.disks)
7789 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7791 maxidx = max(self.disks.keys())
7792 if maxidx >= len(instance.disks):
7793 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7796 if ((self.op.nodes or self.op.iallocator) and
7797 sorted(self.disks.keys()) != range(len(instance.disks))):
7798 raise errors.OpPrereqError("Can't recreate disks partially and"
7799 " change the nodes at the same time",
7802 self.instance = instance
7804 if self.op.iallocator:
7805 self._RunAllocator()
7806 # Release unneeded node and node resource locks
7807 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7808 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7809 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7811 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7813 def Exec(self, feedback_fn):
7814 """Recreate the disks.
7817 instance = self.instance
7819 assert (self.owned_locks(locking.LEVEL_NODE) ==
7820 self.owned_locks(locking.LEVEL_NODE_RES))
7823 mods = [] # keeps track of needed changes
7825 for idx, disk in enumerate(instance.disks):
7827 changes = self.disks[idx]
7829 # Disk should not be recreated
7833 # update secondaries for disks, if needed
7834 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7835 # need to update the nodes and minors
7836 assert len(self.op.nodes) == 2
7837 assert len(disk.logical_id) == 6 # otherwise disk internals
7839 (_, _, old_port, _, _, old_secret) = disk.logical_id
7840 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7841 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7842 new_minors[0], new_minors[1], old_secret)
7843 assert len(disk.logical_id) == len(new_id)
7847 mods.append((idx, new_id, changes))
7849 # now that we have passed all asserts above, we can apply the mods
7850 # in a single run (to avoid partial changes)
7851 for idx, new_id, changes in mods:
7852 disk = instance.disks[idx]
7853 if new_id is not None:
7854 assert disk.dev_type == constants.LD_DRBD8
7855 disk.logical_id = new_id
7857 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7858 mode=changes.get(constants.IDISK_MODE, None))
7860 # change primary node, if needed
7862 instance.primary_node = self.op.nodes[0]
7863 self.LogWarning("Changing the instance's nodes, you will have to"
7864 " remove any disks left on the older nodes manually")
7867 self.cfg.Update(instance, feedback_fn)
7869 # All touched nodes must be locked
7870 mylocks = self.owned_locks(locking.LEVEL_NODE)
7871 assert mylocks.issuperset(frozenset(instance.all_nodes))
7872 _CreateDisks(self, instance, to_skip=to_skip)
7875 class LUInstanceRename(LogicalUnit):
7876 """Rename an instance.
7879 HPATH = "instance-rename"
7880 HTYPE = constants.HTYPE_INSTANCE
7882 def CheckArguments(self):
7886 if self.op.ip_check and not self.op.name_check:
7887 # TODO: make the ip check more flexible and not depend on the name check
7888 raise errors.OpPrereqError("IP address check requires a name check",
7891 def BuildHooksEnv(self):
7894 This runs on master, primary and secondary nodes of the instance.
7897 env = _BuildInstanceHookEnvByObject(self, self.instance)
7898 env["INSTANCE_NEW_NAME"] = self.op.new_name
7901 def BuildHooksNodes(self):
7902 """Build hooks nodes.
7905 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7908 def CheckPrereq(self):
7909 """Check prerequisites.
7911 This checks that the instance is in the cluster and is not running.
7914 self.op.instance_name = _ExpandInstanceName(self.cfg,
7915 self.op.instance_name)
7916 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7917 assert instance is not None
7918 _CheckNodeOnline(self, instance.primary_node)
7919 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7920 msg="cannot rename")
7921 self.instance = instance
7923 new_name = self.op.new_name
7924 if self.op.name_check:
7925 hostname = _CheckHostnameSane(self, new_name)
7926 new_name = self.op.new_name = hostname.name
7927 if (self.op.ip_check and
7928 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7929 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7930 (hostname.ip, new_name),
7931 errors.ECODE_NOTUNIQUE)
7933 instance_list = self.cfg.GetInstanceList()
7934 if new_name in instance_list and new_name != instance.name:
7935 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7936 new_name, errors.ECODE_EXISTS)
7938 def Exec(self, feedback_fn):
7939 """Rename the instance.
7942 inst = self.instance
7943 old_name = inst.name
7945 rename_file_storage = False
7946 if (inst.disk_template in constants.DTS_FILEBASED and
7947 self.op.new_name != inst.name):
7948 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7949 rename_file_storage = True
7951 self.cfg.RenameInstance(inst.name, self.op.new_name)
7952 # Change the instance lock. This is definitely safe while we hold the BGL.
7953 # Otherwise the new lock would have to be added in acquired mode.
7955 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7956 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7957 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7959 # re-read the instance from the configuration after rename
7960 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7962 if rename_file_storage:
7963 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7964 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7965 old_file_storage_dir,
7966 new_file_storage_dir)
7967 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7968 " (but the instance has been renamed in Ganeti)" %
7969 (inst.primary_node, old_file_storage_dir,
7970 new_file_storage_dir))
7972 _StartInstanceDisks(self, inst, None)
7973 # update info on disks
7974 info = _GetInstanceInfoText(inst)
7975 for (idx, disk) in enumerate(inst.disks):
7976 for node in inst.all_nodes:
7977 self.cfg.SetDiskID(disk, node)
7978 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7980 self.LogWarning("Error setting info on node %s for disk %s: %s",
7981 node, idx, result.fail_msg)
7983 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7984 old_name, self.op.debug_level)
7985 msg = result.fail_msg
7987 msg = ("Could not run OS rename script for instance %s on node %s"
7988 " (but the instance has been renamed in Ganeti): %s" %
7989 (inst.name, inst.primary_node, msg))
7990 self.LogWarning(msg)
7992 _ShutdownInstanceDisks(self, inst)
7997 class LUInstanceRemove(LogicalUnit):
7998 """Remove an instance.
8001 HPATH = "instance-remove"
8002 HTYPE = constants.HTYPE_INSTANCE
8005 def ExpandNames(self):
8006 self._ExpandAndLockInstance()
8007 self.needed_locks[locking.LEVEL_NODE] = []
8008 self.needed_locks[locking.LEVEL_NODE_RES] = []
8009 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8011 def DeclareLocks(self, level):
8012 if level == locking.LEVEL_NODE:
8013 self._LockInstancesNodes()
8014 elif level == locking.LEVEL_NODE_RES:
8016 self.needed_locks[locking.LEVEL_NODE_RES] = \
8017 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8019 def BuildHooksEnv(self):
8022 This runs on master, primary and secondary nodes of the instance.
8025 env = _BuildInstanceHookEnvByObject(self, self.instance)
8026 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8029 def BuildHooksNodes(self):
8030 """Build hooks nodes.
8033 nl = [self.cfg.GetMasterNode()]
8034 nl_post = list(self.instance.all_nodes) + nl
8035 return (nl, nl_post)
8037 def CheckPrereq(self):
8038 """Check prerequisites.
8040 This checks that the instance is in the cluster.
8043 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8044 assert self.instance is not None, \
8045 "Cannot retrieve locked instance %s" % self.op.instance_name
8047 def Exec(self, feedback_fn):
8048 """Remove the instance.
8051 instance = self.instance
8052 logging.info("Shutting down instance %s on node %s",
8053 instance.name, instance.primary_node)
8055 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8056 self.op.shutdown_timeout)
8057 msg = result.fail_msg
8059 if self.op.ignore_failures:
8060 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8062 raise errors.OpExecError("Could not shutdown instance %s on"
8064 (instance.name, instance.primary_node, msg))
8066 assert (self.owned_locks(locking.LEVEL_NODE) ==
8067 self.owned_locks(locking.LEVEL_NODE_RES))
8068 assert not (set(instance.all_nodes) -
8069 self.owned_locks(locking.LEVEL_NODE)), \
8070 "Not owning correct locks"
8072 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8075 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8076 """Utility function to remove an instance.
8079 logging.info("Removing block devices for instance %s", instance.name)
8081 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8082 if not ignore_failures:
8083 raise errors.OpExecError("Can't remove instance's disks")
8084 feedback_fn("Warning: can't remove instance's disks")
8086 logging.info("Removing instance %s out of cluster config", instance.name)
8088 lu.cfg.RemoveInstance(instance.name)
8090 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8091 "Instance lock removal conflict"
8093 # Remove lock for the instance
8094 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8097 class LUInstanceQuery(NoHooksLU):
8098 """Logical unit for querying instances.
8101 # pylint: disable=W0142
8104 def CheckArguments(self):
8105 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8106 self.op.output_fields, self.op.use_locking)
8108 def ExpandNames(self):
8109 self.iq.ExpandNames(self)
8111 def DeclareLocks(self, level):
8112 self.iq.DeclareLocks(self, level)
8114 def Exec(self, feedback_fn):
8115 return self.iq.OldStyleQuery(self)
8118 def _ExpandNamesForMigration(lu):
8119 """Expands names for use with L{TLMigrateInstance}.
8121 @type lu: L{LogicalUnit}
8124 if lu.op.target_node is not None:
8125 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8127 lu.needed_locks[locking.LEVEL_NODE] = []
8128 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8130 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8131 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8133 # The node allocation lock is actually only needed for replicated instances
8134 # (e.g. DRBD8) and if an iallocator is used.
8135 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8138 def _DeclareLocksForMigration(lu, level):
8139 """Declares locks for L{TLMigrateInstance}.
8141 @type lu: L{LogicalUnit}
8142 @param level: Lock level
8145 if level == locking.LEVEL_NODE_ALLOC:
8146 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8148 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8150 # Node locks are already declared here rather than at LEVEL_NODE as we need
8151 # the instance object anyway to declare the node allocation lock.
8152 if instance.disk_template in constants.DTS_EXT_MIRROR:
8153 if lu.op.target_node is None:
8154 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8155 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8157 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8159 del lu.recalculate_locks[locking.LEVEL_NODE]
8161 lu._LockInstancesNodes() # pylint: disable=W0212
8163 elif level == locking.LEVEL_NODE:
8164 # Node locks are declared together with the node allocation lock
8165 assert (lu.needed_locks[locking.LEVEL_NODE] or
8166 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8168 elif level == locking.LEVEL_NODE_RES:
8170 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8171 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8174 class LUInstanceFailover(LogicalUnit):
8175 """Failover an instance.
8178 HPATH = "instance-failover"
8179 HTYPE = constants.HTYPE_INSTANCE
8182 def CheckArguments(self):
8183 """Check the arguments.
8186 self.iallocator = getattr(self.op, "iallocator", None)
8187 self.target_node = getattr(self.op, "target_node", None)
8189 def ExpandNames(self):
8190 self._ExpandAndLockInstance()
8191 _ExpandNamesForMigration(self)
8194 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8195 self.op.ignore_consistency, True,
8196 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8198 self.tasklets = [self._migrater]
8200 def DeclareLocks(self, level):
8201 _DeclareLocksForMigration(self, level)
8203 def BuildHooksEnv(self):
8206 This runs on master, primary and secondary nodes of the instance.
8209 instance = self._migrater.instance
8210 source_node = instance.primary_node
8211 target_node = self.op.target_node
8213 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8214 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8215 "OLD_PRIMARY": source_node,
8216 "NEW_PRIMARY": target_node,
8219 if instance.disk_template in constants.DTS_INT_MIRROR:
8220 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8221 env["NEW_SECONDARY"] = source_node
8223 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8225 env.update(_BuildInstanceHookEnvByObject(self, instance))
8229 def BuildHooksNodes(self):
8230 """Build hooks nodes.
8233 instance = self._migrater.instance
8234 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8235 return (nl, nl + [instance.primary_node])
8238 class LUInstanceMigrate(LogicalUnit):
8239 """Migrate an instance.
8241 This is migration without shutting down, compared to the failover,
8242 which is done with shutdown.
8245 HPATH = "instance-migrate"
8246 HTYPE = constants.HTYPE_INSTANCE
8249 def ExpandNames(self):
8250 self._ExpandAndLockInstance()
8251 _ExpandNamesForMigration(self)
8254 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8255 False, self.op.allow_failover, False,
8256 self.op.allow_runtime_changes,
8257 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8258 self.op.ignore_ipolicy)
8260 self.tasklets = [self._migrater]
8262 def DeclareLocks(self, level):
8263 _DeclareLocksForMigration(self, level)
8265 def BuildHooksEnv(self):
8268 This runs on master, primary and secondary nodes of the instance.
8271 instance = self._migrater.instance
8272 source_node = instance.primary_node
8273 target_node = self.op.target_node
8274 env = _BuildInstanceHookEnvByObject(self, instance)
8276 "MIGRATE_LIVE": self._migrater.live,
8277 "MIGRATE_CLEANUP": self.op.cleanup,
8278 "OLD_PRIMARY": source_node,
8279 "NEW_PRIMARY": target_node,
8280 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8283 if instance.disk_template in constants.DTS_INT_MIRROR:
8284 env["OLD_SECONDARY"] = target_node
8285 env["NEW_SECONDARY"] = source_node
8287 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8291 def BuildHooksNodes(self):
8292 """Build hooks nodes.
8295 instance = self._migrater.instance
8296 snodes = list(instance.secondary_nodes)
8297 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8301 class LUInstanceMove(LogicalUnit):
8302 """Move an instance by data-copying.
8305 HPATH = "instance-move"
8306 HTYPE = constants.HTYPE_INSTANCE
8309 def ExpandNames(self):
8310 self._ExpandAndLockInstance()
8311 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8312 self.op.target_node = target_node
8313 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8314 self.needed_locks[locking.LEVEL_NODE_RES] = []
8315 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8317 def DeclareLocks(self, level):
8318 if level == locking.LEVEL_NODE:
8319 self._LockInstancesNodes(primary_only=True)
8320 elif level == locking.LEVEL_NODE_RES:
8322 self.needed_locks[locking.LEVEL_NODE_RES] = \
8323 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8325 def BuildHooksEnv(self):
8328 This runs on master, primary and secondary nodes of the instance.
8332 "TARGET_NODE": self.op.target_node,
8333 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8335 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8338 def BuildHooksNodes(self):
8339 """Build hooks nodes.
8343 self.cfg.GetMasterNode(),
8344 self.instance.primary_node,
8345 self.op.target_node,
8349 def CheckPrereq(self):
8350 """Check prerequisites.
8352 This checks that the instance is in the cluster.
8355 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8356 assert self.instance is not None, \
8357 "Cannot retrieve locked instance %s" % self.op.instance_name
8359 node = self.cfg.GetNodeInfo(self.op.target_node)
8360 assert node is not None, \
8361 "Cannot retrieve locked node %s" % self.op.target_node
8363 self.target_node = target_node = node.name
8365 if target_node == instance.primary_node:
8366 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8367 (instance.name, target_node),
8370 bep = self.cfg.GetClusterInfo().FillBE(instance)
8372 for idx, dsk in enumerate(instance.disks):
8373 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8374 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8375 " cannot copy" % idx, errors.ECODE_STATE)
8377 _CheckNodeOnline(self, target_node)
8378 _CheckNodeNotDrained(self, target_node)
8379 _CheckNodeVmCapable(self, target_node)
8380 cluster = self.cfg.GetClusterInfo()
8381 group_info = self.cfg.GetNodeGroup(node.group)
8382 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8383 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8384 ignore=self.op.ignore_ipolicy)
8386 if instance.admin_state == constants.ADMINST_UP:
8387 # check memory requirements on the secondary node
8388 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8389 instance.name, bep[constants.BE_MAXMEM],
8390 instance.hypervisor)
8392 self.LogInfo("Not checking memory on the secondary node as"
8393 " instance will not be started")
8395 # check bridge existance
8396 _CheckInstanceBridgesExist(self, instance, node=target_node)
8398 def Exec(self, feedback_fn):
8399 """Move an instance.
8401 The move is done by shutting it down on its present node, copying
8402 the data over (slow) and starting it on the new node.
8405 instance = self.instance
8407 source_node = instance.primary_node
8408 target_node = self.target_node
8410 self.LogInfo("Shutting down instance %s on source node %s",
8411 instance.name, source_node)
8413 assert (self.owned_locks(locking.LEVEL_NODE) ==
8414 self.owned_locks(locking.LEVEL_NODE_RES))
8416 result = self.rpc.call_instance_shutdown(source_node, instance,
8417 self.op.shutdown_timeout)
8418 msg = result.fail_msg
8420 if self.op.ignore_consistency:
8421 self.LogWarning("Could not shutdown instance %s on node %s."
8422 " Proceeding anyway. Please make sure node"
8423 " %s is down. Error details: %s",
8424 instance.name, source_node, source_node, msg)
8426 raise errors.OpExecError("Could not shutdown instance %s on"
8428 (instance.name, source_node, msg))
8430 # create the target disks
8432 _CreateDisks(self, instance, target_node=target_node)
8433 except errors.OpExecError:
8434 self.LogWarning("Device creation failed, reverting...")
8436 _RemoveDisks(self, instance, target_node=target_node)
8438 self.cfg.ReleaseDRBDMinors(instance.name)
8441 cluster_name = self.cfg.GetClusterInfo().cluster_name
8444 # activate, get path, copy the data over
8445 for idx, disk in enumerate(instance.disks):
8446 self.LogInfo("Copying data for disk %d", idx)
8447 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8448 instance.name, True, idx)
8450 self.LogWarning("Can't assemble newly created disk %d: %s",
8451 idx, result.fail_msg)
8452 errs.append(result.fail_msg)
8454 dev_path = result.payload
8455 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8456 target_node, dev_path,
8459 self.LogWarning("Can't copy data over for disk %d: %s",
8460 idx, result.fail_msg)
8461 errs.append(result.fail_msg)
8465 self.LogWarning("Some disks failed to copy, aborting")
8467 _RemoveDisks(self, instance, target_node=target_node)
8469 self.cfg.ReleaseDRBDMinors(instance.name)
8470 raise errors.OpExecError("Errors during disk copy: %s" %
8473 instance.primary_node = target_node
8474 self.cfg.Update(instance, feedback_fn)
8476 self.LogInfo("Removing the disks on the original node")
8477 _RemoveDisks(self, instance, target_node=source_node)
8479 # Only start the instance if it's marked as up
8480 if instance.admin_state == constants.ADMINST_UP:
8481 self.LogInfo("Starting instance %s on node %s",
8482 instance.name, target_node)
8484 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8485 ignore_secondaries=True)
8487 _ShutdownInstanceDisks(self, instance)
8488 raise errors.OpExecError("Can't activate the instance's disks")
8490 result = self.rpc.call_instance_start(target_node,
8491 (instance, None, None), False)
8492 msg = result.fail_msg
8494 _ShutdownInstanceDisks(self, instance)
8495 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8496 (instance.name, target_node, msg))
8499 class LUNodeMigrate(LogicalUnit):
8500 """Migrate all instances from a node.
8503 HPATH = "node-migrate"
8504 HTYPE = constants.HTYPE_NODE
8507 def CheckArguments(self):
8510 def ExpandNames(self):
8511 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8513 self.share_locks = _ShareAll()
8514 self.needed_locks = {
8515 locking.LEVEL_NODE: [self.op.node_name],
8518 def BuildHooksEnv(self):
8521 This runs on the master, the primary and all the secondaries.
8525 "NODE_NAME": self.op.node_name,
8526 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8529 def BuildHooksNodes(self):
8530 """Build hooks nodes.
8533 nl = [self.cfg.GetMasterNode()]
8536 def CheckPrereq(self):
8539 def Exec(self, feedback_fn):
8540 # Prepare jobs for migration instances
8541 allow_runtime_changes = self.op.allow_runtime_changes
8543 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8546 iallocator=self.op.iallocator,
8547 target_node=self.op.target_node,
8548 allow_runtime_changes=allow_runtime_changes,
8549 ignore_ipolicy=self.op.ignore_ipolicy)]
8550 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8552 # TODO: Run iallocator in this opcode and pass correct placement options to
8553 # OpInstanceMigrate. Since other jobs can modify the cluster between
8554 # running the iallocator and the actual migration, a good consistency model
8555 # will have to be found.
8557 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8558 frozenset([self.op.node_name]))
8560 return ResultWithJobs(jobs)
8563 class TLMigrateInstance(Tasklet):
8564 """Tasklet class for instance migration.
8567 @ivar live: whether the migration will be done live or non-live;
8568 this variable is initalized only after CheckPrereq has run
8569 @type cleanup: boolean
8570 @ivar cleanup: Wheater we cleanup from a failed migration
8571 @type iallocator: string
8572 @ivar iallocator: The iallocator used to determine target_node
8573 @type target_node: string
8574 @ivar target_node: If given, the target_node to reallocate the instance to
8575 @type failover: boolean
8576 @ivar failover: Whether operation results in failover or migration
8577 @type fallback: boolean
8578 @ivar fallback: Whether fallback to failover is allowed if migration not
8580 @type ignore_consistency: boolean
8581 @ivar ignore_consistency: Wheter we should ignore consistency between source
8583 @type shutdown_timeout: int
8584 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8585 @type ignore_ipolicy: bool
8586 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8591 _MIGRATION_POLL_INTERVAL = 1 # seconds
8592 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8594 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8595 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8597 """Initializes this class.
8600 Tasklet.__init__(self, lu)
8603 self.instance_name = instance_name
8604 self.cleanup = cleanup
8605 self.live = False # will be overridden later
8606 self.failover = failover
8607 self.fallback = fallback
8608 self.ignore_consistency = ignore_consistency
8609 self.shutdown_timeout = shutdown_timeout
8610 self.ignore_ipolicy = ignore_ipolicy
8611 self.allow_runtime_changes = allow_runtime_changes
8613 def CheckPrereq(self):
8614 """Check prerequisites.
8616 This checks that the instance is in the cluster.
8619 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8620 instance = self.cfg.GetInstanceInfo(instance_name)
8621 assert instance is not None
8622 self.instance = instance
8623 cluster = self.cfg.GetClusterInfo()
8625 if (not self.cleanup and
8626 not instance.admin_state == constants.ADMINST_UP and
8627 not self.failover and self.fallback):
8628 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8629 " switching to failover")
8630 self.failover = True
8632 if instance.disk_template not in constants.DTS_MIRRORED:
8637 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8638 " %s" % (instance.disk_template, text),
8641 if instance.disk_template in constants.DTS_EXT_MIRROR:
8642 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8644 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8646 if self.lu.op.iallocator:
8647 self._RunAllocator()
8649 # We set set self.target_node as it is required by
8651 self.target_node = self.lu.op.target_node
8653 # Check that the target node is correct in terms of instance policy
8654 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8655 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8656 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8658 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8659 ignore=self.ignore_ipolicy)
8661 # self.target_node is already populated, either directly or by the
8663 target_node = self.target_node
8664 if self.target_node == instance.primary_node:
8665 raise errors.OpPrereqError("Cannot migrate instance %s"
8666 " to its primary (%s)" %
8667 (instance.name, instance.primary_node),
8670 if len(self.lu.tasklets) == 1:
8671 # It is safe to release locks only when we're the only tasklet
8673 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8674 keep=[instance.primary_node, self.target_node])
8675 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8678 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8680 secondary_nodes = instance.secondary_nodes
8681 if not secondary_nodes:
8682 raise errors.ConfigurationError("No secondary node but using"
8683 " %s disk template" %
8684 instance.disk_template)
8685 target_node = secondary_nodes[0]
8686 if self.lu.op.iallocator or (self.lu.op.target_node and
8687 self.lu.op.target_node != target_node):
8689 text = "failed over"
8692 raise errors.OpPrereqError("Instances with disk template %s cannot"
8693 " be %s to arbitrary nodes"
8694 " (neither an iallocator nor a target"
8695 " node can be passed)" %
8696 (instance.disk_template, text),
8698 nodeinfo = self.cfg.GetNodeInfo(target_node)
8699 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8700 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8702 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8703 ignore=self.ignore_ipolicy)
8705 i_be = cluster.FillBE(instance)
8707 # check memory requirements on the secondary node
8708 if (not self.cleanup and
8709 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8710 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8711 "migrating instance %s" %
8713 i_be[constants.BE_MINMEM],
8714 instance.hypervisor)
8716 self.lu.LogInfo("Not checking memory on the secondary node as"
8717 " instance will not be started")
8719 # check if failover must be forced instead of migration
8720 if (not self.cleanup and not self.failover and
8721 i_be[constants.BE_ALWAYS_FAILOVER]):
8722 self.lu.LogInfo("Instance configured to always failover; fallback"
8724 self.failover = True
8726 # check bridge existance
8727 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8729 if not self.cleanup:
8730 _CheckNodeNotDrained(self.lu, target_node)
8731 if not self.failover:
8732 result = self.rpc.call_instance_migratable(instance.primary_node,
8734 if result.fail_msg and self.fallback:
8735 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8737 self.failover = True
8739 result.Raise("Can't migrate, please use failover",
8740 prereq=True, ecode=errors.ECODE_STATE)
8742 assert not (self.failover and self.cleanup)
8744 if not self.failover:
8745 if self.lu.op.live is not None and self.lu.op.mode is not None:
8746 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8747 " parameters are accepted",
8749 if self.lu.op.live is not None:
8751 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8753 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8754 # reset the 'live' parameter to None so that repeated
8755 # invocations of CheckPrereq do not raise an exception
8756 self.lu.op.live = None
8757 elif self.lu.op.mode is None:
8758 # read the default value from the hypervisor
8759 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8760 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8762 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8764 # Failover is never live
8767 if not (self.failover or self.cleanup):
8768 remote_info = self.rpc.call_instance_info(instance.primary_node,
8770 instance.hypervisor)
8771 remote_info.Raise("Error checking instance on node %s" %
8772 instance.primary_node)
8773 instance_running = bool(remote_info.payload)
8774 if instance_running:
8775 self.current_mem = int(remote_info.payload["memory"])
8777 def _RunAllocator(self):
8778 """Run the allocator based on input opcode.
8781 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8783 # FIXME: add a self.ignore_ipolicy option
8784 req = iallocator.IAReqRelocate(name=self.instance_name,
8785 relocate_from=[self.instance.primary_node])
8786 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8788 ial.Run(self.lu.op.iallocator)
8791 raise errors.OpPrereqError("Can't compute nodes using"
8792 " iallocator '%s': %s" %
8793 (self.lu.op.iallocator, ial.info),
8795 self.target_node = ial.result[0]
8796 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8797 self.instance_name, self.lu.op.iallocator,
8798 utils.CommaJoin(ial.result))
8800 def _WaitUntilSync(self):
8801 """Poll with custom rpc for disk sync.
8803 This uses our own step-based rpc call.
8806 self.feedback_fn("* wait until resync is done")
8810 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8812 (self.instance.disks,
8815 for node, nres in result.items():
8816 nres.Raise("Cannot resync disks on node %s" % node)
8817 node_done, node_percent = nres.payload
8818 all_done = all_done and node_done
8819 if node_percent is not None:
8820 min_percent = min(min_percent, node_percent)
8822 if min_percent < 100:
8823 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8826 def _EnsureSecondary(self, node):
8827 """Demote a node to secondary.
8830 self.feedback_fn("* switching node %s to secondary mode" % node)
8832 for dev in self.instance.disks:
8833 self.cfg.SetDiskID(dev, node)
8835 result = self.rpc.call_blockdev_close(node, self.instance.name,
8836 self.instance.disks)
8837 result.Raise("Cannot change disk to secondary on node %s" % node)
8839 def _GoStandalone(self):
8840 """Disconnect from the network.
8843 self.feedback_fn("* changing into standalone mode")
8844 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8845 self.instance.disks)
8846 for node, nres in result.items():
8847 nres.Raise("Cannot disconnect disks node %s" % node)
8849 def _GoReconnect(self, multimaster):
8850 """Reconnect to the network.
8856 msg = "single-master"
8857 self.feedback_fn("* changing disks into %s mode" % msg)
8858 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8859 (self.instance.disks, self.instance),
8860 self.instance.name, multimaster)
8861 for node, nres in result.items():
8862 nres.Raise("Cannot change disks config on node %s" % node)
8864 def _ExecCleanup(self):
8865 """Try to cleanup after a failed migration.
8867 The cleanup is done by:
8868 - check that the instance is running only on one node
8869 (and update the config if needed)
8870 - change disks on its secondary node to secondary
8871 - wait until disks are fully synchronized
8872 - disconnect from the network
8873 - change disks into single-master mode
8874 - wait again until disks are fully synchronized
8877 instance = self.instance
8878 target_node = self.target_node
8879 source_node = self.source_node
8881 # check running on only one node
8882 self.feedback_fn("* checking where the instance actually runs"
8883 " (if this hangs, the hypervisor might be in"
8885 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8886 for node, result in ins_l.items():
8887 result.Raise("Can't contact node %s" % node)
8889 runningon_source = instance.name in ins_l[source_node].payload
8890 runningon_target = instance.name in ins_l[target_node].payload
8892 if runningon_source and runningon_target:
8893 raise errors.OpExecError("Instance seems to be running on two nodes,"
8894 " or the hypervisor is confused; you will have"
8895 " to ensure manually that it runs only on one"
8896 " and restart this operation")
8898 if not (runningon_source or runningon_target):
8899 raise errors.OpExecError("Instance does not seem to be running at all;"
8900 " in this case it's safer to repair by"
8901 " running 'gnt-instance stop' to ensure disk"
8902 " shutdown, and then restarting it")
8904 if runningon_target:
8905 # the migration has actually succeeded, we need to update the config
8906 self.feedback_fn("* instance running on secondary node (%s),"
8907 " updating config" % target_node)
8908 instance.primary_node = target_node
8909 self.cfg.Update(instance, self.feedback_fn)
8910 demoted_node = source_node
8912 self.feedback_fn("* instance confirmed to be running on its"
8913 " primary node (%s)" % source_node)
8914 demoted_node = target_node
8916 if instance.disk_template in constants.DTS_INT_MIRROR:
8917 self._EnsureSecondary(demoted_node)
8919 self._WaitUntilSync()
8920 except errors.OpExecError:
8921 # we ignore here errors, since if the device is standalone, it
8922 # won't be able to sync
8924 self._GoStandalone()
8925 self._GoReconnect(False)
8926 self._WaitUntilSync()
8928 self.feedback_fn("* done")
8930 def _RevertDiskStatus(self):
8931 """Try to revert the disk status after a failed migration.
8934 target_node = self.target_node
8935 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8939 self._EnsureSecondary(target_node)
8940 self._GoStandalone()
8941 self._GoReconnect(False)
8942 self._WaitUntilSync()
8943 except errors.OpExecError, err:
8944 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8945 " please try to recover the instance manually;"
8946 " error '%s'" % str(err))
8948 def _AbortMigration(self):
8949 """Call the hypervisor code to abort a started migration.
8952 instance = self.instance
8953 target_node = self.target_node
8954 source_node = self.source_node
8955 migration_info = self.migration_info
8957 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8961 abort_msg = abort_result.fail_msg
8963 logging.error("Aborting migration failed on target node %s: %s",
8964 target_node, abort_msg)
8965 # Don't raise an exception here, as we stil have to try to revert the
8966 # disk status, even if this step failed.
8968 abort_result = self.rpc.call_instance_finalize_migration_src(
8969 source_node, instance, False, self.live)
8970 abort_msg = abort_result.fail_msg
8972 logging.error("Aborting migration failed on source node %s: %s",
8973 source_node, abort_msg)
8975 def _ExecMigration(self):
8976 """Migrate an instance.
8978 The migrate is done by:
8979 - change the disks into dual-master mode
8980 - wait until disks are fully synchronized again
8981 - migrate the instance
8982 - change disks on the new secondary node (the old primary) to secondary
8983 - wait until disks are fully synchronized
8984 - change disks into single-master mode
8987 instance = self.instance
8988 target_node = self.target_node
8989 source_node = self.source_node
8991 # Check for hypervisor version mismatch and warn the user.
8992 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8993 None, [self.instance.hypervisor], False)
8994 for ninfo in nodeinfo.values():
8995 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8997 (_, _, (src_info, )) = nodeinfo[source_node].payload
8998 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9000 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9001 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9002 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9003 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9004 if src_version != dst_version:
9005 self.feedback_fn("* warning: hypervisor version mismatch between"
9006 " source (%s) and target (%s) node" %
9007 (src_version, dst_version))
9009 self.feedback_fn("* checking disk consistency between source and target")
9010 for (idx, dev) in enumerate(instance.disks):
9011 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9012 raise errors.OpExecError("Disk %s is degraded or not fully"
9013 " synchronized on target node,"
9014 " aborting migration" % idx)
9016 if self.current_mem > self.tgt_free_mem:
9017 if not self.allow_runtime_changes:
9018 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9019 " free memory to fit instance %s on target"
9020 " node %s (have %dMB, need %dMB)" %
9021 (instance.name, target_node,
9022 self.tgt_free_mem, self.current_mem))
9023 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9024 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9027 rpcres.Raise("Cannot modify instance runtime memory")
9029 # First get the migration information from the remote node
9030 result = self.rpc.call_migration_info(source_node, instance)
9031 msg = result.fail_msg
9033 log_err = ("Failed fetching source migration information from %s: %s" %
9035 logging.error(log_err)
9036 raise errors.OpExecError(log_err)
9038 self.migration_info = migration_info = result.payload
9040 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9041 # Then switch the disks to master/master mode
9042 self._EnsureSecondary(target_node)
9043 self._GoStandalone()
9044 self._GoReconnect(True)
9045 self._WaitUntilSync()
9047 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9048 result = self.rpc.call_accept_instance(target_node,
9051 self.nodes_ip[target_node])
9053 msg = result.fail_msg
9055 logging.error("Instance pre-migration failed, trying to revert"
9056 " disk status: %s", msg)
9057 self.feedback_fn("Pre-migration failed, aborting")
9058 self._AbortMigration()
9059 self._RevertDiskStatus()
9060 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9061 (instance.name, msg))
9063 self.feedback_fn("* migrating instance to %s" % target_node)
9064 result = self.rpc.call_instance_migrate(source_node, instance,
9065 self.nodes_ip[target_node],
9067 msg = result.fail_msg
9069 logging.error("Instance migration failed, trying to revert"
9070 " disk status: %s", msg)
9071 self.feedback_fn("Migration failed, aborting")
9072 self._AbortMigration()
9073 self._RevertDiskStatus()
9074 raise errors.OpExecError("Could not migrate instance %s: %s" %
9075 (instance.name, msg))
9077 self.feedback_fn("* starting memory transfer")
9078 last_feedback = time.time()
9080 result = self.rpc.call_instance_get_migration_status(source_node,
9082 msg = result.fail_msg
9083 ms = result.payload # MigrationStatus instance
9084 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9085 logging.error("Instance migration failed, trying to revert"
9086 " disk status: %s", msg)
9087 self.feedback_fn("Migration failed, aborting")
9088 self._AbortMigration()
9089 self._RevertDiskStatus()
9091 msg = "hypervisor returned failure"
9092 raise errors.OpExecError("Could not migrate instance %s: %s" %
9093 (instance.name, msg))
9095 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9096 self.feedback_fn("* memory transfer complete")
9099 if (utils.TimeoutExpired(last_feedback,
9100 self._MIGRATION_FEEDBACK_INTERVAL) and
9101 ms.transferred_ram is not None):
9102 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9103 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9104 last_feedback = time.time()
9106 time.sleep(self._MIGRATION_POLL_INTERVAL)
9108 result = self.rpc.call_instance_finalize_migration_src(source_node,
9112 msg = result.fail_msg
9114 logging.error("Instance migration succeeded, but finalization failed"
9115 " on the source node: %s", msg)
9116 raise errors.OpExecError("Could not finalize instance migration: %s" %
9119 instance.primary_node = target_node
9121 # distribute new instance config to the other nodes
9122 self.cfg.Update(instance, self.feedback_fn)
9124 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9128 msg = result.fail_msg
9130 logging.error("Instance migration succeeded, but finalization failed"
9131 " on the target node: %s", msg)
9132 raise errors.OpExecError("Could not finalize instance migration: %s" %
9135 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9136 self._EnsureSecondary(source_node)
9137 self._WaitUntilSync()
9138 self._GoStandalone()
9139 self._GoReconnect(False)
9140 self._WaitUntilSync()
9142 # If the instance's disk template is `rbd' or `ext' and there was a
9143 # successful migration, unmap the device from the source node.
9144 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9145 disks = _ExpandCheckDisks(instance, instance.disks)
9146 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9148 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9149 msg = result.fail_msg
9151 logging.error("Migration was successful, but couldn't unmap the"
9152 " block device %s on source node %s: %s",
9153 disk.iv_name, source_node, msg)
9154 logging.error("You need to unmap the device %s manually on %s",
9155 disk.iv_name, source_node)
9157 self.feedback_fn("* done")
9159 def _ExecFailover(self):
9160 """Failover an instance.
9162 The failover is done by shutting it down on its present node and
9163 starting it on the secondary.
9166 instance = self.instance
9167 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9169 source_node = instance.primary_node
9170 target_node = self.target_node
9172 if instance.admin_state == constants.ADMINST_UP:
9173 self.feedback_fn("* checking disk consistency between source and target")
9174 for (idx, dev) in enumerate(instance.disks):
9175 # for drbd, these are drbd over lvm
9176 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9178 if primary_node.offline:
9179 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9181 (primary_node.name, idx, target_node))
9182 elif not self.ignore_consistency:
9183 raise errors.OpExecError("Disk %s is degraded on target node,"
9184 " aborting failover" % idx)
9186 self.feedback_fn("* not checking disk consistency as instance is not"
9189 self.feedback_fn("* shutting down instance on source node")
9190 logging.info("Shutting down instance %s on node %s",
9191 instance.name, source_node)
9193 result = self.rpc.call_instance_shutdown(source_node, instance,
9194 self.shutdown_timeout)
9195 msg = result.fail_msg
9197 if self.ignore_consistency or primary_node.offline:
9198 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9199 " proceeding anyway; please make sure node"
9200 " %s is down; error details: %s",
9201 instance.name, source_node, source_node, msg)
9203 raise errors.OpExecError("Could not shutdown instance %s on"
9205 (instance.name, source_node, msg))
9207 self.feedback_fn("* deactivating the instance's disks on source node")
9208 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9209 raise errors.OpExecError("Can't shut down the instance's disks")
9211 instance.primary_node = target_node
9212 # distribute new instance config to the other nodes
9213 self.cfg.Update(instance, self.feedback_fn)
9215 # Only start the instance if it's marked as up
9216 if instance.admin_state == constants.ADMINST_UP:
9217 self.feedback_fn("* activating the instance's disks on target node %s" %
9219 logging.info("Starting instance %s on node %s",
9220 instance.name, target_node)
9222 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9223 ignore_secondaries=True)
9225 _ShutdownInstanceDisks(self.lu, instance)
9226 raise errors.OpExecError("Can't activate the instance's disks")
9228 self.feedback_fn("* starting the instance on the target node %s" %
9230 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9232 msg = result.fail_msg
9234 _ShutdownInstanceDisks(self.lu, instance)
9235 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9236 (instance.name, target_node, msg))
9238 def Exec(self, feedback_fn):
9239 """Perform the migration.
9242 self.feedback_fn = feedback_fn
9243 self.source_node = self.instance.primary_node
9245 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9246 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9247 self.target_node = self.instance.secondary_nodes[0]
9248 # Otherwise self.target_node has been populated either
9249 # directly, or through an iallocator.
9251 self.all_nodes = [self.source_node, self.target_node]
9252 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9253 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9256 feedback_fn("Failover instance %s" % self.instance.name)
9257 self._ExecFailover()
9259 feedback_fn("Migrating instance %s" % self.instance.name)
9262 return self._ExecCleanup()
9264 return self._ExecMigration()
9267 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9269 """Wrapper around L{_CreateBlockDevInner}.
9271 This method annotates the root device first.
9274 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9275 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9276 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9277 force_open, excl_stor)
9280 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9281 info, force_open, excl_stor):
9282 """Create a tree of block devices on a given node.
9284 If this device type has to be created on secondaries, create it and
9287 If not, just recurse to children keeping the same 'force' value.
9289 @attention: The device has to be annotated already.
9291 @param lu: the lu on whose behalf we execute
9292 @param node: the node on which to create the device
9293 @type instance: L{objects.Instance}
9294 @param instance: the instance which owns the device
9295 @type device: L{objects.Disk}
9296 @param device: the device to create
9297 @type force_create: boolean
9298 @param force_create: whether to force creation of this device; this
9299 will be change to True whenever we find a device which has
9300 CreateOnSecondary() attribute
9301 @param info: the extra 'metadata' we should attach to the device
9302 (this will be represented as a LVM tag)
9303 @type force_open: boolean
9304 @param force_open: this parameter will be passes to the
9305 L{backend.BlockdevCreate} function where it specifies
9306 whether we run on primary or not, and it affects both
9307 the child assembly and the device own Open() execution
9308 @type excl_stor: boolean
9309 @param excl_stor: Whether exclusive_storage is active for the node
9312 if device.CreateOnSecondary():
9316 for child in device.children:
9317 _CreateBlockDevInner(lu, node, instance, child, force_create,
9318 info, force_open, excl_stor)
9320 if not force_create:
9323 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9327 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9329 """Create a single block device on a given node.
9331 This will not recurse over children of the device, so they must be
9334 @param lu: the lu on whose behalf we execute
9335 @param node: the node on which to create the device
9336 @type instance: L{objects.Instance}
9337 @param instance: the instance which owns the device
9338 @type device: L{objects.Disk}
9339 @param device: the device to create
9340 @param info: the extra 'metadata' we should attach to the device
9341 (this will be represented as a LVM tag)
9342 @type force_open: boolean
9343 @param force_open: this parameter will be passes to the
9344 L{backend.BlockdevCreate} function where it specifies
9345 whether we run on primary or not, and it affects both
9346 the child assembly and the device own Open() execution
9347 @type excl_stor: boolean
9348 @param excl_stor: Whether exclusive_storage is active for the node
9351 lu.cfg.SetDiskID(device, node)
9352 result = lu.rpc.call_blockdev_create(node, device, device.size,
9353 instance.name, force_open, info,
9355 result.Raise("Can't create block device %s on"
9356 " node %s for instance %s" % (device, node, instance.name))
9357 if device.physical_id is None:
9358 device.physical_id = result.payload
9361 def _GenerateUniqueNames(lu, exts):
9362 """Generate a suitable LV name.
9364 This will generate a logical volume name for the given instance.
9369 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9370 results.append("%s%s" % (new_id, val))
9374 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9375 iv_name, p_minor, s_minor):
9376 """Generate a drbd8 device complete with its children.
9379 assert len(vgnames) == len(names) == 2
9380 port = lu.cfg.AllocatePort()
9381 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9383 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9384 logical_id=(vgnames[0], names[0]),
9386 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9387 size=constants.DRBD_META_SIZE,
9388 logical_id=(vgnames[1], names[1]),
9390 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9391 logical_id=(primary, secondary, port,
9394 children=[dev_data, dev_meta],
9395 iv_name=iv_name, params={})
9399 _DISK_TEMPLATE_NAME_PREFIX = {
9400 constants.DT_PLAIN: "",
9401 constants.DT_RBD: ".rbd",
9402 constants.DT_EXT: ".ext",
9406 _DISK_TEMPLATE_DEVICE_TYPE = {
9407 constants.DT_PLAIN: constants.LD_LV,
9408 constants.DT_FILE: constants.LD_FILE,
9409 constants.DT_SHARED_FILE: constants.LD_FILE,
9410 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9411 constants.DT_RBD: constants.LD_RBD,
9412 constants.DT_EXT: constants.LD_EXT,
9416 def _GenerateDiskTemplate(
9417 lu, template_name, instance_name, primary_node, secondary_nodes,
9418 disk_info, file_storage_dir, file_driver, base_index,
9419 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9420 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9421 """Generate the entire disk layout for a given template type.
9424 vgname = lu.cfg.GetVGName()
9425 disk_count = len(disk_info)
9428 if template_name == constants.DT_DISKLESS:
9430 elif template_name == constants.DT_DRBD8:
9431 if len(secondary_nodes) != 1:
9432 raise errors.ProgrammerError("Wrong template configuration")
9433 remote_node = secondary_nodes[0]
9434 minors = lu.cfg.AllocateDRBDMinor(
9435 [primary_node, remote_node] * len(disk_info), instance_name)
9437 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9439 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9442 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9443 for i in range(disk_count)]):
9444 names.append(lv_prefix + "_data")
9445 names.append(lv_prefix + "_meta")
9446 for idx, disk in enumerate(disk_info):
9447 disk_index = idx + base_index
9448 data_vg = disk.get(constants.IDISK_VG, vgname)
9449 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9450 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9451 disk[constants.IDISK_SIZE],
9453 names[idx * 2:idx * 2 + 2],
9454 "disk/%d" % disk_index,
9455 minors[idx * 2], minors[idx * 2 + 1])
9456 disk_dev.mode = disk[constants.IDISK_MODE]
9457 disks.append(disk_dev)
9460 raise errors.ProgrammerError("Wrong template configuration")
9462 if template_name == constants.DT_FILE:
9464 elif template_name == constants.DT_SHARED_FILE:
9465 _req_shr_file_storage()
9467 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9468 if name_prefix is None:
9471 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9472 (name_prefix, base_index + i)
9473 for i in range(disk_count)])
9475 if template_name == constants.DT_PLAIN:
9477 def logical_id_fn(idx, _, disk):
9478 vg = disk.get(constants.IDISK_VG, vgname)
9479 return (vg, names[idx])
9481 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9483 lambda _, disk_index, disk: (file_driver,
9484 "%s/disk%d" % (file_storage_dir,
9486 elif template_name == constants.DT_BLOCK:
9488 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9489 disk[constants.IDISK_ADOPT])
9490 elif template_name == constants.DT_RBD:
9491 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9492 elif template_name == constants.DT_EXT:
9493 def logical_id_fn(idx, _, disk):
9494 provider = disk.get(constants.IDISK_PROVIDER, None)
9495 if provider is None:
9496 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9497 " not found", constants.DT_EXT,
9498 constants.IDISK_PROVIDER)
9499 return (provider, names[idx])
9501 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9503 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9505 for idx, disk in enumerate(disk_info):
9507 # Only for the Ext template add disk_info to params
9508 if template_name == constants.DT_EXT:
9509 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9511 if key not in constants.IDISK_PARAMS:
9512 params[key] = disk[key]
9513 disk_index = idx + base_index
9514 size = disk[constants.IDISK_SIZE]
9515 feedback_fn("* disk %s, size %s" %
9516 (disk_index, utils.FormatUnit(size, "h")))
9517 disks.append(objects.Disk(dev_type=dev_type, size=size,
9518 logical_id=logical_id_fn(idx, disk_index, disk),
9519 iv_name="disk/%d" % disk_index,
9520 mode=disk[constants.IDISK_MODE],
9526 def _GetInstanceInfoText(instance):
9527 """Compute that text that should be added to the disk's metadata.
9530 return "originstname+%s" % instance.name
9533 def _CalcEta(time_taken, written, total_size):
9534 """Calculates the ETA based on size written and total size.
9536 @param time_taken: The time taken so far
9537 @param written: amount written so far
9538 @param total_size: The total size of data to be written
9539 @return: The remaining time in seconds
9542 avg_time = time_taken / float(written)
9543 return (total_size - written) * avg_time
9546 def _WipeDisks(lu, instance, disks=None):
9547 """Wipes instance disks.
9549 @type lu: L{LogicalUnit}
9550 @param lu: the logical unit on whose behalf we execute
9551 @type instance: L{objects.Instance}
9552 @param instance: the instance whose disks we should create
9553 @return: the success of the wipe
9556 node = instance.primary_node
9559 disks = [(idx, disk, 0)
9560 for (idx, disk) in enumerate(instance.disks)]
9562 for (_, device, _) in disks:
9563 lu.cfg.SetDiskID(device, node)
9565 logging.info("Pausing synchronization of disks of instance '%s'",
9567 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9568 (map(compat.snd, disks),
9571 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9573 for idx, success in enumerate(result.payload):
9575 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9576 " failed", idx, instance.name)
9579 for (idx, device, offset) in disks:
9580 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9581 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9583 int(min(constants.MAX_WIPE_CHUNK,
9584 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9588 start_time = time.time()
9593 info_text = (" (from %s to %s)" %
9594 (utils.FormatUnit(offset, "h"),
9595 utils.FormatUnit(size, "h")))
9597 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9599 logging.info("Wiping disk %d for instance %s on node %s using"
9600 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9602 while offset < size:
9603 wipe_size = min(wipe_chunk_size, size - offset)
9605 logging.debug("Wiping disk %d, offset %s, chunk %s",
9606 idx, offset, wipe_size)
9608 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9610 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9611 (idx, offset, wipe_size))
9615 if now - last_output >= 60:
9616 eta = _CalcEta(now - start_time, offset, size)
9617 lu.LogInfo(" - done: %.1f%% ETA: %s",
9618 offset / float(size) * 100, utils.FormatSeconds(eta))
9621 logging.info("Resuming synchronization of disks for instance '%s'",
9624 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9625 (map(compat.snd, disks),
9630 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9631 node, result.fail_msg)
9633 for idx, success in enumerate(result.payload):
9635 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9636 " failed", idx, instance.name)
9639 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9640 """Create all disks for an instance.
9642 This abstracts away some work from AddInstance.
9644 @type lu: L{LogicalUnit}
9645 @param lu: the logical unit on whose behalf we execute
9646 @type instance: L{objects.Instance}
9647 @param instance: the instance whose disks we should create
9649 @param to_skip: list of indices to skip
9650 @type target_node: string
9651 @param target_node: if passed, overrides the target node for creation
9653 @return: the success of the creation
9656 info = _GetInstanceInfoText(instance)
9657 if target_node is None:
9658 pnode = instance.primary_node
9659 all_nodes = instance.all_nodes
9664 if instance.disk_template in constants.DTS_FILEBASED:
9665 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9666 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9668 result.Raise("Failed to create directory '%s' on"
9669 " node %s" % (file_storage_dir, pnode))
9671 # Note: this needs to be kept in sync with adding of disks in
9672 # LUInstanceSetParams
9673 for idx, device in enumerate(instance.disks):
9674 if to_skip and idx in to_skip:
9676 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9678 for node in all_nodes:
9679 f_create = node == pnode
9680 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9683 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9684 """Remove all disks for an instance.
9686 This abstracts away some work from `AddInstance()` and
9687 `RemoveInstance()`. Note that in case some of the devices couldn't
9688 be removed, the removal will continue with the other ones (compare
9689 with `_CreateDisks()`).
9691 @type lu: L{LogicalUnit}
9692 @param lu: the logical unit on whose behalf we execute
9693 @type instance: L{objects.Instance}
9694 @param instance: the instance whose disks we should remove
9695 @type target_node: string
9696 @param target_node: used to override the node on which to remove the disks
9698 @return: the success of the removal
9701 logging.info("Removing block devices for instance %s", instance.name)
9704 ports_to_release = set()
9705 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9706 for (idx, device) in enumerate(anno_disks):
9708 edata = [(target_node, device)]
9710 edata = device.ComputeNodeTree(instance.primary_node)
9711 for node, disk in edata:
9712 lu.cfg.SetDiskID(disk, node)
9713 result = lu.rpc.call_blockdev_remove(node, disk)
9715 lu.LogWarning("Could not remove disk %s on node %s,"
9716 " continuing anyway: %s", idx, node, result.fail_msg)
9717 if not (result.offline and node != instance.primary_node):
9720 # if this is a DRBD disk, return its port to the pool
9721 if device.dev_type in constants.LDS_DRBD:
9722 ports_to_release.add(device.logical_id[2])
9724 if all_result or ignore_failures:
9725 for port in ports_to_release:
9726 lu.cfg.AddTcpUdpPort(port)
9728 if instance.disk_template in constants.DTS_FILEBASED:
9729 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9733 tgt = instance.primary_node
9734 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9736 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9737 file_storage_dir, instance.primary_node, result.fail_msg)
9743 def _ComputeDiskSizePerVG(disk_template, disks):
9744 """Compute disk size requirements in the volume group
9747 def _compute(disks, payload):
9748 """Universal algorithm.
9753 vgs[disk[constants.IDISK_VG]] = \
9754 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9758 # Required free disk space as a function of disk and swap space
9760 constants.DT_DISKLESS: {},
9761 constants.DT_PLAIN: _compute(disks, 0),
9762 # 128 MB are added for drbd metadata for each disk
9763 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9764 constants.DT_FILE: {},
9765 constants.DT_SHARED_FILE: {},
9768 if disk_template not in req_size_dict:
9769 raise errors.ProgrammerError("Disk template '%s' size requirement"
9770 " is unknown" % disk_template)
9772 return req_size_dict[disk_template]
9775 def _FilterVmNodes(lu, nodenames):
9776 """Filters out non-vm_capable nodes from a list.
9778 @type lu: L{LogicalUnit}
9779 @param lu: the logical unit for which we check
9780 @type nodenames: list
9781 @param nodenames: the list of nodes on which we should check
9783 @return: the list of vm-capable nodes
9786 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9787 return [name for name in nodenames if name not in vm_nodes]
9790 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9791 """Hypervisor parameter validation.
9793 This function abstract the hypervisor parameter validation to be
9794 used in both instance create and instance modify.
9796 @type lu: L{LogicalUnit}
9797 @param lu: the logical unit for which we check
9798 @type nodenames: list
9799 @param nodenames: the list of nodes on which we should check
9800 @type hvname: string
9801 @param hvname: the name of the hypervisor we should use
9802 @type hvparams: dict
9803 @param hvparams: the parameters which we need to check
9804 @raise errors.OpPrereqError: if the parameters are not valid
9807 nodenames = _FilterVmNodes(lu, nodenames)
9809 cluster = lu.cfg.GetClusterInfo()
9810 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9812 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9813 for node in nodenames:
9817 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9820 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9821 """OS parameters validation.
9823 @type lu: L{LogicalUnit}
9824 @param lu: the logical unit for which we check
9825 @type required: boolean
9826 @param required: whether the validation should fail if the OS is not
9828 @type nodenames: list
9829 @param nodenames: the list of nodes on which we should check
9830 @type osname: string
9831 @param osname: the name of the hypervisor we should use
9832 @type osparams: dict
9833 @param osparams: the parameters which we need to check
9834 @raise errors.OpPrereqError: if the parameters are not valid
9837 nodenames = _FilterVmNodes(lu, nodenames)
9838 result = lu.rpc.call_os_validate(nodenames, required, osname,
9839 [constants.OS_VALIDATE_PARAMETERS],
9841 for node, nres in result.items():
9842 # we don't check for offline cases since this should be run only
9843 # against the master node and/or an instance's nodes
9844 nres.Raise("OS Parameters validation failed on node %s" % node)
9845 if not nres.payload:
9846 lu.LogInfo("OS %s not found on node %s, validation skipped",
9850 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9851 """Wrapper around IAReqInstanceAlloc.
9853 @param op: The instance opcode
9854 @param disks: The computed disks
9855 @param nics: The computed nics
9856 @param beparams: The full filled beparams
9857 @param node_whitelist: List of nodes which should appear as online to the
9858 allocator (unless the node is already marked offline)
9860 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9863 spindle_use = beparams[constants.BE_SPINDLE_USE]
9864 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9865 disk_template=op.disk_template,
9868 vcpus=beparams[constants.BE_VCPUS],
9869 memory=beparams[constants.BE_MAXMEM],
9870 spindle_use=spindle_use,
9872 nics=[n.ToDict() for n in nics],
9873 hypervisor=op.hypervisor,
9874 node_whitelist=node_whitelist)
9877 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9878 """Computes the nics.
9880 @param op: The instance opcode
9881 @param cluster: Cluster configuration object
9882 @param default_ip: The default ip to assign
9883 @param cfg: An instance of the configuration object
9884 @param ec_id: Execution context ID
9886 @returns: The build up nics
9891 nic_mode_req = nic.get(constants.INIC_MODE, None)
9892 nic_mode = nic_mode_req
9893 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9894 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9896 net = nic.get(constants.INIC_NETWORK, None)
9897 link = nic.get(constants.NIC_LINK, None)
9898 ip = nic.get(constants.INIC_IP, None)
9900 if net is None or net.lower() == constants.VALUE_NONE:
9903 if nic_mode_req is not None or link is not None:
9904 raise errors.OpPrereqError("If network is given, no mode or link"
9905 " is allowed to be passed",
9908 # ip validity checks
9909 if ip is None or ip.lower() == constants.VALUE_NONE:
9911 elif ip.lower() == constants.VALUE_AUTO:
9912 if not op.name_check:
9913 raise errors.OpPrereqError("IP address set to auto but name checks"
9914 " have been skipped",
9918 # We defer pool operations until later, so that the iallocator has
9919 # filled in the instance's node(s) dimara
9920 if ip.lower() == constants.NIC_IP_POOL:
9922 raise errors.OpPrereqError("if ip=pool, parameter network"
9923 " must be passed too",
9926 elif not netutils.IPAddress.IsValid(ip):
9927 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9932 # TODO: check the ip address for uniqueness
9933 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9934 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9937 # MAC address verification
9938 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9939 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9940 mac = utils.NormalizeAndValidateMac(mac)
9943 # TODO: We need to factor this out
9944 cfg.ReserveMAC(mac, ec_id)
9945 except errors.ReservationError:
9946 raise errors.OpPrereqError("MAC address %s already in use"
9947 " in cluster" % mac,
9948 errors.ECODE_NOTUNIQUE)
9950 # Build nic parameters
9953 nicparams[constants.NIC_MODE] = nic_mode
9955 nicparams[constants.NIC_LINK] = link
9957 check_params = cluster.SimpleFillNIC(nicparams)
9958 objects.NIC.CheckParameterSyntax(check_params)
9959 net_uuid = cfg.LookupNetwork(net)
9960 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9961 network=net_uuid, nicparams=nicparams))
9966 def _ComputeDisks(op, default_vg):
9967 """Computes the instance disks.
9969 @param op: The instance opcode
9970 @param default_vg: The default_vg to assume
9972 @return: The computed disks
9976 for disk in op.disks:
9977 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9978 if mode not in constants.DISK_ACCESS_SET:
9979 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9980 mode, errors.ECODE_INVAL)
9981 size = disk.get(constants.IDISK_SIZE, None)
9983 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9986 except (TypeError, ValueError):
9987 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9990 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9991 if ext_provider and op.disk_template != constants.DT_EXT:
9992 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9993 " disk template, not %s" %
9994 (constants.IDISK_PROVIDER, constants.DT_EXT,
9995 op.disk_template), errors.ECODE_INVAL)
9997 data_vg = disk.get(constants.IDISK_VG, default_vg)
9999 constants.IDISK_SIZE: size,
10000 constants.IDISK_MODE: mode,
10001 constants.IDISK_VG: data_vg,
10004 if constants.IDISK_METAVG in disk:
10005 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10006 if constants.IDISK_ADOPT in disk:
10007 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10009 # For extstorage, demand the `provider' option and add any
10010 # additional parameters (ext-params) to the dict
10011 if op.disk_template == constants.DT_EXT:
10013 new_disk[constants.IDISK_PROVIDER] = ext_provider
10015 if key not in constants.IDISK_PARAMS:
10016 new_disk[key] = disk[key]
10018 raise errors.OpPrereqError("Missing provider for template '%s'" %
10019 constants.DT_EXT, errors.ECODE_INVAL)
10021 disks.append(new_disk)
10026 def _ComputeFullBeParams(op, cluster):
10027 """Computes the full beparams.
10029 @param op: The instance opcode
10030 @param cluster: The cluster config object
10032 @return: The fully filled beparams
10035 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10036 for param, value in op.beparams.iteritems():
10037 if value == constants.VALUE_AUTO:
10038 op.beparams[param] = default_beparams[param]
10039 objects.UpgradeBeParams(op.beparams)
10040 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10041 return cluster.SimpleFillBE(op.beparams)
10044 def _CheckOpportunisticLocking(op):
10045 """Generate error if opportunistic locking is not possible.
10048 if op.opportunistic_locking and not op.iallocator:
10049 raise errors.OpPrereqError("Opportunistic locking is only available in"
10050 " combination with an instance allocator",
10051 errors.ECODE_INVAL)
10054 class LUInstanceCreate(LogicalUnit):
10055 """Create an instance.
10058 HPATH = "instance-add"
10059 HTYPE = constants.HTYPE_INSTANCE
10062 def CheckArguments(self):
10063 """Check arguments.
10066 # do not require name_check to ease forward/backward compatibility
10068 if self.op.no_install and self.op.start:
10069 self.LogInfo("No-installation mode selected, disabling startup")
10070 self.op.start = False
10071 # validate/normalize the instance name
10072 self.op.instance_name = \
10073 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10075 if self.op.ip_check and not self.op.name_check:
10076 # TODO: make the ip check more flexible and not depend on the name check
10077 raise errors.OpPrereqError("Cannot do IP address check without a name"
10078 " check", errors.ECODE_INVAL)
10080 # check nics' parameter names
10081 for nic in self.op.nics:
10082 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10084 # check disks. parameter names and consistent adopt/no-adopt strategy
10085 has_adopt = has_no_adopt = False
10086 for disk in self.op.disks:
10087 if self.op.disk_template != constants.DT_EXT:
10088 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10089 if constants.IDISK_ADOPT in disk:
10092 has_no_adopt = True
10093 if has_adopt and has_no_adopt:
10094 raise errors.OpPrereqError("Either all disks are adopted or none is",
10095 errors.ECODE_INVAL)
10097 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10098 raise errors.OpPrereqError("Disk adoption is not supported for the"
10099 " '%s' disk template" %
10100 self.op.disk_template,
10101 errors.ECODE_INVAL)
10102 if self.op.iallocator is not None:
10103 raise errors.OpPrereqError("Disk adoption not allowed with an"
10104 " iallocator script", errors.ECODE_INVAL)
10105 if self.op.mode == constants.INSTANCE_IMPORT:
10106 raise errors.OpPrereqError("Disk adoption not allowed for"
10107 " instance import", errors.ECODE_INVAL)
10109 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10110 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10111 " but no 'adopt' parameter given" %
10112 self.op.disk_template,
10113 errors.ECODE_INVAL)
10115 self.adopt_disks = has_adopt
10117 # instance name verification
10118 if self.op.name_check:
10119 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10120 self.op.instance_name = self.hostname1.name
10121 # used in CheckPrereq for ip ping check
10122 self.check_ip = self.hostname1.ip
10124 self.check_ip = None
10126 # file storage checks
10127 if (self.op.file_driver and
10128 not self.op.file_driver in constants.FILE_DRIVER):
10129 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10130 self.op.file_driver, errors.ECODE_INVAL)
10132 if self.op.disk_template == constants.DT_FILE:
10133 opcodes.RequireFileStorage()
10134 elif self.op.disk_template == constants.DT_SHARED_FILE:
10135 opcodes.RequireSharedFileStorage()
10137 ### Node/iallocator related checks
10138 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10140 if self.op.pnode is not None:
10141 if self.op.disk_template in constants.DTS_INT_MIRROR:
10142 if self.op.snode is None:
10143 raise errors.OpPrereqError("The networked disk templates need"
10144 " a mirror node", errors.ECODE_INVAL)
10145 elif self.op.snode:
10146 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10148 self.op.snode = None
10150 _CheckOpportunisticLocking(self.op)
10152 self._cds = _GetClusterDomainSecret()
10154 if self.op.mode == constants.INSTANCE_IMPORT:
10155 # On import force_variant must be True, because if we forced it at
10156 # initial install, our only chance when importing it back is that it
10158 self.op.force_variant = True
10160 if self.op.no_install:
10161 self.LogInfo("No-installation mode has no effect during import")
10163 elif self.op.mode == constants.INSTANCE_CREATE:
10164 if self.op.os_type is None:
10165 raise errors.OpPrereqError("No guest OS specified",
10166 errors.ECODE_INVAL)
10167 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10168 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10169 " installation" % self.op.os_type,
10170 errors.ECODE_STATE)
10171 if self.op.disk_template is None:
10172 raise errors.OpPrereqError("No disk template specified",
10173 errors.ECODE_INVAL)
10175 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10176 # Check handshake to ensure both clusters have the same domain secret
10177 src_handshake = self.op.source_handshake
10178 if not src_handshake:
10179 raise errors.OpPrereqError("Missing source handshake",
10180 errors.ECODE_INVAL)
10182 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10185 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10186 errors.ECODE_INVAL)
10188 # Load and check source CA
10189 self.source_x509_ca_pem = self.op.source_x509_ca
10190 if not self.source_x509_ca_pem:
10191 raise errors.OpPrereqError("Missing source X509 CA",
10192 errors.ECODE_INVAL)
10195 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10197 except OpenSSL.crypto.Error, err:
10198 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10199 (err, ), errors.ECODE_INVAL)
10201 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10202 if errcode is not None:
10203 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10204 errors.ECODE_INVAL)
10206 self.source_x509_ca = cert
10208 src_instance_name = self.op.source_instance_name
10209 if not src_instance_name:
10210 raise errors.OpPrereqError("Missing source instance name",
10211 errors.ECODE_INVAL)
10213 self.source_instance_name = \
10214 netutils.GetHostname(name=src_instance_name).name
10217 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10218 self.op.mode, errors.ECODE_INVAL)
10220 def ExpandNames(self):
10221 """ExpandNames for CreateInstance.
10223 Figure out the right locks for instance creation.
10226 self.needed_locks = {}
10228 instance_name = self.op.instance_name
10229 # this is just a preventive check, but someone might still add this
10230 # instance in the meantime, and creation will fail at lock-add time
10231 if instance_name in self.cfg.GetInstanceList():
10232 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10233 instance_name, errors.ECODE_EXISTS)
10235 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10237 if self.op.iallocator:
10238 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10239 # specifying a group on instance creation and then selecting nodes from
10241 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10242 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10244 if self.op.opportunistic_locking:
10245 self.opportunistic_locks[locking.LEVEL_NODE] = True
10246 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10248 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10249 nodelist = [self.op.pnode]
10250 if self.op.snode is not None:
10251 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10252 nodelist.append(self.op.snode)
10253 self.needed_locks[locking.LEVEL_NODE] = nodelist
10255 # in case of import lock the source node too
10256 if self.op.mode == constants.INSTANCE_IMPORT:
10257 src_node = self.op.src_node
10258 src_path = self.op.src_path
10260 if src_path is None:
10261 self.op.src_path = src_path = self.op.instance_name
10263 if src_node is None:
10264 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10265 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10266 self.op.src_node = None
10267 if os.path.isabs(src_path):
10268 raise errors.OpPrereqError("Importing an instance from a path"
10269 " requires a source node option",
10270 errors.ECODE_INVAL)
10272 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10273 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10274 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10275 if not os.path.isabs(src_path):
10276 self.op.src_path = src_path = \
10277 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10279 self.needed_locks[locking.LEVEL_NODE_RES] = \
10280 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10282 def _RunAllocator(self):
10283 """Run the allocator based on input opcode.
10286 if self.op.opportunistic_locking:
10287 # Only consider nodes for which a lock is held
10288 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10290 node_whitelist = None
10292 #TODO Export network to iallocator so that it chooses a pnode
10293 # in a nodegroup that has the desired network connected to
10294 req = _CreateInstanceAllocRequest(self.op, self.disks,
10295 self.nics, self.be_full,
10297 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10299 ial.Run(self.op.iallocator)
10301 if not ial.success:
10302 # When opportunistic locks are used only a temporary failure is generated
10303 if self.op.opportunistic_locking:
10304 ecode = errors.ECODE_TEMP_NORES
10306 ecode = errors.ECODE_NORES
10308 raise errors.OpPrereqError("Can't compute nodes using"
10309 " iallocator '%s': %s" %
10310 (self.op.iallocator, ial.info),
10313 self.op.pnode = ial.result[0]
10314 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10315 self.op.instance_name, self.op.iallocator,
10316 utils.CommaJoin(ial.result))
10318 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10320 if req.RequiredNodes() == 2:
10321 self.op.snode = ial.result[1]
10323 def BuildHooksEnv(self):
10324 """Build hooks env.
10326 This runs on master, primary and secondary nodes of the instance.
10330 "ADD_MODE": self.op.mode,
10332 if self.op.mode == constants.INSTANCE_IMPORT:
10333 env["SRC_NODE"] = self.op.src_node
10334 env["SRC_PATH"] = self.op.src_path
10335 env["SRC_IMAGES"] = self.src_images
10337 env.update(_BuildInstanceHookEnv(
10338 name=self.op.instance_name,
10339 primary_node=self.op.pnode,
10340 secondary_nodes=self.secondaries,
10341 status=self.op.start,
10342 os_type=self.op.os_type,
10343 minmem=self.be_full[constants.BE_MINMEM],
10344 maxmem=self.be_full[constants.BE_MAXMEM],
10345 vcpus=self.be_full[constants.BE_VCPUS],
10346 nics=_NICListToTuple(self, self.nics),
10347 disk_template=self.op.disk_template,
10348 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10349 for d in self.disks],
10352 hypervisor_name=self.op.hypervisor,
10358 def BuildHooksNodes(self):
10359 """Build hooks nodes.
10362 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10365 def _ReadExportInfo(self):
10366 """Reads the export information from disk.
10368 It will override the opcode source node and path with the actual
10369 information, if these two were not specified before.
10371 @return: the export information
10374 assert self.op.mode == constants.INSTANCE_IMPORT
10376 src_node = self.op.src_node
10377 src_path = self.op.src_path
10379 if src_node is None:
10380 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10381 exp_list = self.rpc.call_export_list(locked_nodes)
10383 for node in exp_list:
10384 if exp_list[node].fail_msg:
10386 if src_path in exp_list[node].payload:
10388 self.op.src_node = src_node = node
10389 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10393 raise errors.OpPrereqError("No export found for relative path %s" %
10394 src_path, errors.ECODE_INVAL)
10396 _CheckNodeOnline(self, src_node)
10397 result = self.rpc.call_export_info(src_node, src_path)
10398 result.Raise("No export or invalid export found in dir %s" % src_path)
10400 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10401 if not export_info.has_section(constants.INISECT_EXP):
10402 raise errors.ProgrammerError("Corrupted export config",
10403 errors.ECODE_ENVIRON)
10405 ei_version = export_info.get(constants.INISECT_EXP, "version")
10406 if (int(ei_version) != constants.EXPORT_VERSION):
10407 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10408 (ei_version, constants.EXPORT_VERSION),
10409 errors.ECODE_ENVIRON)
10412 def _ReadExportParams(self, einfo):
10413 """Use export parameters as defaults.
10415 In case the opcode doesn't specify (as in override) some instance
10416 parameters, then try to use them from the export information, if
10417 that declares them.
10420 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10422 if self.op.disk_template is None:
10423 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10424 self.op.disk_template = einfo.get(constants.INISECT_INS,
10426 if self.op.disk_template not in constants.DISK_TEMPLATES:
10427 raise errors.OpPrereqError("Disk template specified in configuration"
10428 " file is not one of the allowed values:"
10430 " ".join(constants.DISK_TEMPLATES),
10431 errors.ECODE_INVAL)
10433 raise errors.OpPrereqError("No disk template specified and the export"
10434 " is missing the disk_template information",
10435 errors.ECODE_INVAL)
10437 if not self.op.disks:
10439 # TODO: import the disk iv_name too
10440 for idx in range(constants.MAX_DISKS):
10441 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10442 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10443 disks.append({constants.IDISK_SIZE: disk_sz})
10444 self.op.disks = disks
10445 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10446 raise errors.OpPrereqError("No disk info specified and the export"
10447 " is missing the disk information",
10448 errors.ECODE_INVAL)
10450 if not self.op.nics:
10452 for idx in range(constants.MAX_NICS):
10453 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10455 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10456 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10461 self.op.nics = nics
10463 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10464 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10466 if (self.op.hypervisor is None and
10467 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10468 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10470 if einfo.has_section(constants.INISECT_HYP):
10471 # use the export parameters but do not override the ones
10472 # specified by the user
10473 for name, value in einfo.items(constants.INISECT_HYP):
10474 if name not in self.op.hvparams:
10475 self.op.hvparams[name] = value
10477 if einfo.has_section(constants.INISECT_BEP):
10478 # use the parameters, without overriding
10479 for name, value in einfo.items(constants.INISECT_BEP):
10480 if name not in self.op.beparams:
10481 self.op.beparams[name] = value
10482 # Compatibility for the old "memory" be param
10483 if name == constants.BE_MEMORY:
10484 if constants.BE_MAXMEM not in self.op.beparams:
10485 self.op.beparams[constants.BE_MAXMEM] = value
10486 if constants.BE_MINMEM not in self.op.beparams:
10487 self.op.beparams[constants.BE_MINMEM] = value
10489 # try to read the parameters old style, from the main section
10490 for name in constants.BES_PARAMETERS:
10491 if (name not in self.op.beparams and
10492 einfo.has_option(constants.INISECT_INS, name)):
10493 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10495 if einfo.has_section(constants.INISECT_OSP):
10496 # use the parameters, without overriding
10497 for name, value in einfo.items(constants.INISECT_OSP):
10498 if name not in self.op.osparams:
10499 self.op.osparams[name] = value
10501 def _RevertToDefaults(self, cluster):
10502 """Revert the instance parameters to the default values.
10506 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10507 for name in self.op.hvparams.keys():
10508 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10509 del self.op.hvparams[name]
10511 be_defs = cluster.SimpleFillBE({})
10512 for name in self.op.beparams.keys():
10513 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10514 del self.op.beparams[name]
10516 nic_defs = cluster.SimpleFillNIC({})
10517 for nic in self.op.nics:
10518 for name in constants.NICS_PARAMETERS:
10519 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10522 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10523 for name in self.op.osparams.keys():
10524 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10525 del self.op.osparams[name]
10527 def _CalculateFileStorageDir(self):
10528 """Calculate final instance file storage dir.
10531 # file storage dir calculation/check
10532 self.instance_file_storage_dir = None
10533 if self.op.disk_template in constants.DTS_FILEBASED:
10534 # build the full file storage dir path
10537 if self.op.disk_template == constants.DT_SHARED_FILE:
10538 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10540 get_fsd_fn = self.cfg.GetFileStorageDir
10542 cfg_storagedir = get_fsd_fn()
10543 if not cfg_storagedir:
10544 raise errors.OpPrereqError("Cluster file storage dir not defined",
10545 errors.ECODE_STATE)
10546 joinargs.append(cfg_storagedir)
10548 if self.op.file_storage_dir is not None:
10549 joinargs.append(self.op.file_storage_dir)
10551 joinargs.append(self.op.instance_name)
10553 # pylint: disable=W0142
10554 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10556 def CheckPrereq(self): # pylint: disable=R0914
10557 """Check prerequisites.
10560 self._CalculateFileStorageDir()
10562 if self.op.mode == constants.INSTANCE_IMPORT:
10563 export_info = self._ReadExportInfo()
10564 self._ReadExportParams(export_info)
10565 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10567 self._old_instance_name = None
10569 if (not self.cfg.GetVGName() and
10570 self.op.disk_template not in constants.DTS_NOT_LVM):
10571 raise errors.OpPrereqError("Cluster does not support lvm-based"
10572 " instances", errors.ECODE_STATE)
10574 if (self.op.hypervisor is None or
10575 self.op.hypervisor == constants.VALUE_AUTO):
10576 self.op.hypervisor = self.cfg.GetHypervisorType()
10578 cluster = self.cfg.GetClusterInfo()
10579 enabled_hvs = cluster.enabled_hypervisors
10580 if self.op.hypervisor not in enabled_hvs:
10581 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10583 (self.op.hypervisor, ",".join(enabled_hvs)),
10584 errors.ECODE_STATE)
10586 # Check tag validity
10587 for tag in self.op.tags:
10588 objects.TaggableObject.ValidateTag(tag)
10590 # check hypervisor parameter syntax (locally)
10591 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10592 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10594 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10595 hv_type.CheckParameterSyntax(filled_hvp)
10596 self.hv_full = filled_hvp
10597 # check that we don't specify global parameters on an instance
10598 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10599 "instance", "cluster")
10601 # fill and remember the beparams dict
10602 self.be_full = _ComputeFullBeParams(self.op, cluster)
10604 # build os parameters
10605 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10607 # now that hvp/bep are in final format, let's reset to defaults,
10609 if self.op.identify_defaults:
10610 self._RevertToDefaults(cluster)
10613 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10614 self.proc.GetECId())
10616 # disk checks/pre-build
10617 default_vg = self.cfg.GetVGName()
10618 self.disks = _ComputeDisks(self.op, default_vg)
10620 if self.op.mode == constants.INSTANCE_IMPORT:
10622 for idx in range(len(self.disks)):
10623 option = "disk%d_dump" % idx
10624 if export_info.has_option(constants.INISECT_INS, option):
10625 # FIXME: are the old os-es, disk sizes, etc. useful?
10626 export_name = export_info.get(constants.INISECT_INS, option)
10627 image = utils.PathJoin(self.op.src_path, export_name)
10628 disk_images.append(image)
10630 disk_images.append(False)
10632 self.src_images = disk_images
10634 if self.op.instance_name == self._old_instance_name:
10635 for idx, nic in enumerate(self.nics):
10636 if nic.mac == constants.VALUE_AUTO:
10637 nic_mac_ini = "nic%d_mac" % idx
10638 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10640 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10642 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10643 if self.op.ip_check:
10644 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10645 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10646 (self.check_ip, self.op.instance_name),
10647 errors.ECODE_NOTUNIQUE)
10649 #### mac address generation
10650 # By generating here the mac address both the allocator and the hooks get
10651 # the real final mac address rather than the 'auto' or 'generate' value.
10652 # There is a race condition between the generation and the instance object
10653 # creation, which means that we know the mac is valid now, but we're not
10654 # sure it will be when we actually add the instance. If things go bad
10655 # adding the instance will abort because of a duplicate mac, and the
10656 # creation job will fail.
10657 for nic in self.nics:
10658 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10659 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10663 if self.op.iallocator is not None:
10664 self._RunAllocator()
10666 # Release all unneeded node locks
10667 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10668 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10669 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10670 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10672 assert (self.owned_locks(locking.LEVEL_NODE) ==
10673 self.owned_locks(locking.LEVEL_NODE_RES)), \
10674 "Node locks differ from node resource locks"
10676 #### node related checks
10678 # check primary node
10679 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10680 assert self.pnode is not None, \
10681 "Cannot retrieve locked node %s" % self.op.pnode
10683 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10684 pnode.name, errors.ECODE_STATE)
10686 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10687 pnode.name, errors.ECODE_STATE)
10688 if not pnode.vm_capable:
10689 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10690 " '%s'" % pnode.name, errors.ECODE_STATE)
10692 self.secondaries = []
10694 # Fill in any IPs from IP pools. This must happen here, because we need to
10695 # know the nic's primary node, as specified by the iallocator
10696 for idx, nic in enumerate(self.nics):
10697 net_uuid = nic.network
10698 if net_uuid is not None:
10699 nobj = self.cfg.GetNetwork(net_uuid)
10700 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10701 if netparams is None:
10702 raise errors.OpPrereqError("No netparams found for network"
10703 " %s. Propably not connected to"
10704 " node's %s nodegroup" %
10705 (nobj.name, self.pnode.name),
10706 errors.ECODE_INVAL)
10707 self.LogInfo("NIC/%d inherits netparams %s" %
10708 (idx, netparams.values()))
10709 nic.nicparams = dict(netparams)
10710 if nic.ip is not None:
10711 if nic.ip.lower() == constants.NIC_IP_POOL:
10713 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10714 except errors.ReservationError:
10715 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10716 " from the address pool" % idx,
10717 errors.ECODE_STATE)
10718 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10721 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10722 except errors.ReservationError:
10723 raise errors.OpPrereqError("IP address %s already in use"
10724 " or does not belong to network %s" %
10725 (nic.ip, nobj.name),
10726 errors.ECODE_NOTUNIQUE)
10728 # net is None, ip None or given
10729 elif self.op.conflicts_check:
10730 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10732 # mirror node verification
10733 if self.op.disk_template in constants.DTS_INT_MIRROR:
10734 if self.op.snode == pnode.name:
10735 raise errors.OpPrereqError("The secondary node cannot be the"
10736 " primary node", errors.ECODE_INVAL)
10737 _CheckNodeOnline(self, self.op.snode)
10738 _CheckNodeNotDrained(self, self.op.snode)
10739 _CheckNodeVmCapable(self, self.op.snode)
10740 self.secondaries.append(self.op.snode)
10742 snode = self.cfg.GetNodeInfo(self.op.snode)
10743 if pnode.group != snode.group:
10744 self.LogWarning("The primary and secondary nodes are in two"
10745 " different node groups; the disk parameters"
10746 " from the first disk's node group will be"
10749 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10751 if self.op.disk_template in constants.DTS_INT_MIRROR:
10752 nodes.append(snode)
10753 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10754 if compat.any(map(has_es, nodes)):
10755 raise errors.OpPrereqError("Disk template %s not supported with"
10756 " exclusive storage" % self.op.disk_template,
10757 errors.ECODE_STATE)
10759 nodenames = [pnode.name] + self.secondaries
10761 # Verify instance specs
10762 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10764 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10765 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10766 constants.ISPEC_DISK_COUNT: len(self.disks),
10767 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10768 constants.ISPEC_NIC_COUNT: len(self.nics),
10769 constants.ISPEC_SPINDLE_USE: spindle_use,
10772 group_info = self.cfg.GetNodeGroup(pnode.group)
10773 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10774 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10775 if not self.op.ignore_ipolicy and res:
10776 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10777 (pnode.group, group_info.name, utils.CommaJoin(res)))
10778 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10780 if not self.adopt_disks:
10781 if self.op.disk_template == constants.DT_RBD:
10782 # _CheckRADOSFreeSpace() is just a placeholder.
10783 # Any function that checks prerequisites can be placed here.
10784 # Check if there is enough space on the RADOS cluster.
10785 _CheckRADOSFreeSpace()
10786 elif self.op.disk_template == constants.DT_EXT:
10787 # FIXME: Function that checks prereqs if needed
10790 # Check lv size requirements, if not adopting
10791 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10792 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10794 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10795 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10796 disk[constants.IDISK_ADOPT])
10797 for disk in self.disks])
10798 if len(all_lvs) != len(self.disks):
10799 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10800 errors.ECODE_INVAL)
10801 for lv_name in all_lvs:
10803 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10804 # to ReserveLV uses the same syntax
10805 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10806 except errors.ReservationError:
10807 raise errors.OpPrereqError("LV named %s used by another instance" %
10808 lv_name, errors.ECODE_NOTUNIQUE)
10810 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10811 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10813 node_lvs = self.rpc.call_lv_list([pnode.name],
10814 vg_names.payload.keys())[pnode.name]
10815 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10816 node_lvs = node_lvs.payload
10818 delta = all_lvs.difference(node_lvs.keys())
10820 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10821 utils.CommaJoin(delta),
10822 errors.ECODE_INVAL)
10823 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10825 raise errors.OpPrereqError("Online logical volumes found, cannot"
10826 " adopt: %s" % utils.CommaJoin(online_lvs),
10827 errors.ECODE_STATE)
10828 # update the size of disk based on what is found
10829 for dsk in self.disks:
10830 dsk[constants.IDISK_SIZE] = \
10831 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10832 dsk[constants.IDISK_ADOPT])][0]))
10834 elif self.op.disk_template == constants.DT_BLOCK:
10835 # Normalize and de-duplicate device paths
10836 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10837 for disk in self.disks])
10838 if len(all_disks) != len(self.disks):
10839 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10840 errors.ECODE_INVAL)
10841 baddisks = [d for d in all_disks
10842 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10844 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10845 " cannot be adopted" %
10846 (utils.CommaJoin(baddisks),
10847 constants.ADOPTABLE_BLOCKDEV_ROOT),
10848 errors.ECODE_INVAL)
10850 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10851 list(all_disks))[pnode.name]
10852 node_disks.Raise("Cannot get block device information from node %s" %
10854 node_disks = node_disks.payload
10855 delta = all_disks.difference(node_disks.keys())
10857 raise errors.OpPrereqError("Missing block device(s): %s" %
10858 utils.CommaJoin(delta),
10859 errors.ECODE_INVAL)
10860 for dsk in self.disks:
10861 dsk[constants.IDISK_SIZE] = \
10862 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10864 # Verify instance specs
10865 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10867 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10868 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10869 constants.ISPEC_DISK_COUNT: len(self.disks),
10870 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10871 for disk in self.disks],
10872 constants.ISPEC_NIC_COUNT: len(self.nics),
10873 constants.ISPEC_SPINDLE_USE: spindle_use,
10876 group_info = self.cfg.GetNodeGroup(pnode.group)
10877 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10878 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10879 if not self.op.ignore_ipolicy and res:
10880 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10881 " policy: %s") % (pnode.group,
10882 utils.CommaJoin(res)),
10883 errors.ECODE_INVAL)
10885 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10887 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10888 # check OS parameters (remotely)
10889 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10891 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10893 #TODO: _CheckExtParams (remotely)
10894 # Check parameters for extstorage
10896 # memory check on primary node
10897 #TODO(dynmem): use MINMEM for checking
10899 _CheckNodeFreeMemory(self, self.pnode.name,
10900 "creating instance %s" % self.op.instance_name,
10901 self.be_full[constants.BE_MAXMEM],
10902 self.op.hypervisor)
10904 self.dry_run_result = list(nodenames)
10906 def Exec(self, feedback_fn):
10907 """Create and add the instance to the cluster.
10910 instance = self.op.instance_name
10911 pnode_name = self.pnode.name
10913 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10914 self.owned_locks(locking.LEVEL_NODE)), \
10915 "Node locks differ from node resource locks"
10916 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10918 ht_kind = self.op.hypervisor
10919 if ht_kind in constants.HTS_REQ_PORT:
10920 network_port = self.cfg.AllocatePort()
10922 network_port = None
10924 # This is ugly but we got a chicken-egg problem here
10925 # We can only take the group disk parameters, as the instance
10926 # has no disks yet (we are generating them right here).
10927 node = self.cfg.GetNodeInfo(pnode_name)
10928 nodegroup = self.cfg.GetNodeGroup(node.group)
10929 disks = _GenerateDiskTemplate(self,
10930 self.op.disk_template,
10931 instance, pnode_name,
10934 self.instance_file_storage_dir,
10935 self.op.file_driver,
10938 self.cfg.GetGroupDiskParams(nodegroup))
10940 iobj = objects.Instance(name=instance, os=self.op.os_type,
10941 primary_node=pnode_name,
10942 nics=self.nics, disks=disks,
10943 disk_template=self.op.disk_template,
10944 admin_state=constants.ADMINST_DOWN,
10945 network_port=network_port,
10946 beparams=self.op.beparams,
10947 hvparams=self.op.hvparams,
10948 hypervisor=self.op.hypervisor,
10949 osparams=self.op.osparams,
10953 for tag in self.op.tags:
10956 if self.adopt_disks:
10957 if self.op.disk_template == constants.DT_PLAIN:
10958 # rename LVs to the newly-generated names; we need to construct
10959 # 'fake' LV disks with the old data, plus the new unique_id
10960 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10962 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10963 rename_to.append(t_dsk.logical_id)
10964 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10965 self.cfg.SetDiskID(t_dsk, pnode_name)
10966 result = self.rpc.call_blockdev_rename(pnode_name,
10967 zip(tmp_disks, rename_to))
10968 result.Raise("Failed to rename adoped LVs")
10970 feedback_fn("* creating instance disks...")
10972 _CreateDisks(self, iobj)
10973 except errors.OpExecError:
10974 self.LogWarning("Device creation failed, reverting...")
10976 _RemoveDisks(self, iobj)
10978 self.cfg.ReleaseDRBDMinors(instance)
10981 feedback_fn("adding instance %s to cluster config" % instance)
10983 self.cfg.AddInstance(iobj, self.proc.GetECId())
10985 # Declare that we don't want to remove the instance lock anymore, as we've
10986 # added the instance to the config
10987 del self.remove_locks[locking.LEVEL_INSTANCE]
10989 if self.op.mode == constants.INSTANCE_IMPORT:
10990 # Release unused nodes
10991 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10993 # Release all nodes
10994 _ReleaseLocks(self, locking.LEVEL_NODE)
10997 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10998 feedback_fn("* wiping instance disks...")
11000 _WipeDisks(self, iobj)
11001 except errors.OpExecError, err:
11002 logging.exception("Wiping disks failed")
11003 self.LogWarning("Wiping instance disks failed (%s)", err)
11007 # Something is already wrong with the disks, don't do anything else
11009 elif self.op.wait_for_sync:
11010 disk_abort = not _WaitForSync(self, iobj)
11011 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11012 # make sure the disks are not degraded (still sync-ing is ok)
11013 feedback_fn("* checking mirrors status")
11014 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11019 _RemoveDisks(self, iobj)
11020 self.cfg.RemoveInstance(iobj.name)
11021 # Make sure the instance lock gets removed
11022 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11023 raise errors.OpExecError("There are some degraded disks for"
11026 # Release all node resource locks
11027 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11029 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11030 # we need to set the disks ID to the primary node, since the
11031 # preceding code might or might have not done it, depending on
11032 # disk template and other options
11033 for disk in iobj.disks:
11034 self.cfg.SetDiskID(disk, pnode_name)
11035 if self.op.mode == constants.INSTANCE_CREATE:
11036 if not self.op.no_install:
11037 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11038 not self.op.wait_for_sync)
11040 feedback_fn("* pausing disk sync to install instance OS")
11041 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11044 for idx, success in enumerate(result.payload):
11046 logging.warn("pause-sync of instance %s for disk %d failed",
11049 feedback_fn("* running the instance OS create scripts...")
11050 # FIXME: pass debug option from opcode to backend
11052 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11053 self.op.debug_level)
11055 feedback_fn("* resuming disk sync")
11056 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11059 for idx, success in enumerate(result.payload):
11061 logging.warn("resume-sync of instance %s for disk %d failed",
11064 os_add_result.Raise("Could not add os for instance %s"
11065 " on node %s" % (instance, pnode_name))
11068 if self.op.mode == constants.INSTANCE_IMPORT:
11069 feedback_fn("* running the instance OS import scripts...")
11073 for idx, image in enumerate(self.src_images):
11077 # FIXME: pass debug option from opcode to backend
11078 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11079 constants.IEIO_FILE, (image, ),
11080 constants.IEIO_SCRIPT,
11081 (iobj.disks[idx], idx),
11083 transfers.append(dt)
11086 masterd.instance.TransferInstanceData(self, feedback_fn,
11087 self.op.src_node, pnode_name,
11088 self.pnode.secondary_ip,
11090 if not compat.all(import_result):
11091 self.LogWarning("Some disks for instance %s on node %s were not"
11092 " imported successfully" % (instance, pnode_name))
11094 rename_from = self._old_instance_name
11096 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11097 feedback_fn("* preparing remote import...")
11098 # The source cluster will stop the instance before attempting to make
11099 # a connection. In some cases stopping an instance can take a long
11100 # time, hence the shutdown timeout is added to the connection
11102 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11103 self.op.source_shutdown_timeout)
11104 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11106 assert iobj.primary_node == self.pnode.name
11108 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11109 self.source_x509_ca,
11110 self._cds, timeouts)
11111 if not compat.all(disk_results):
11112 # TODO: Should the instance still be started, even if some disks
11113 # failed to import (valid for local imports, too)?
11114 self.LogWarning("Some disks for instance %s on node %s were not"
11115 " imported successfully" % (instance, pnode_name))
11117 rename_from = self.source_instance_name
11120 # also checked in the prereq part
11121 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11124 # Run rename script on newly imported instance
11125 assert iobj.name == instance
11126 feedback_fn("Running rename script for %s" % instance)
11127 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11129 self.op.debug_level)
11130 if result.fail_msg:
11131 self.LogWarning("Failed to run rename script for %s on node"
11132 " %s: %s" % (instance, pnode_name, result.fail_msg))
11134 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11137 iobj.admin_state = constants.ADMINST_UP
11138 self.cfg.Update(iobj, feedback_fn)
11139 logging.info("Starting instance %s on node %s", instance, pnode_name)
11140 feedback_fn("* starting instance...")
11141 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11143 result.Raise("Could not start instance")
11145 return list(iobj.all_nodes)
11148 class LUInstanceMultiAlloc(NoHooksLU):
11149 """Allocates multiple instances at the same time.
11154 def CheckArguments(self):
11155 """Check arguments.
11159 for inst in self.op.instances:
11160 if inst.iallocator is not None:
11161 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11162 " instance objects", errors.ECODE_INVAL)
11163 nodes.append(bool(inst.pnode))
11164 if inst.disk_template in constants.DTS_INT_MIRROR:
11165 nodes.append(bool(inst.snode))
11167 has_nodes = compat.any(nodes)
11168 if compat.all(nodes) ^ has_nodes:
11169 raise errors.OpPrereqError("There are instance objects providing"
11170 " pnode/snode while others do not",
11171 errors.ECODE_INVAL)
11173 if self.op.iallocator is None:
11174 default_iallocator = self.cfg.GetDefaultIAllocator()
11175 if default_iallocator and has_nodes:
11176 self.op.iallocator = default_iallocator
11178 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11179 " given and no cluster-wide default"
11180 " iallocator found; please specify either"
11181 " an iallocator or nodes on the instances"
11182 " or set a cluster-wide default iallocator",
11183 errors.ECODE_INVAL)
11185 _CheckOpportunisticLocking(self.op)
11187 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11189 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11190 utils.CommaJoin(dups), errors.ECODE_INVAL)
11192 def ExpandNames(self):
11193 """Calculate the locks.
11196 self.share_locks = _ShareAll()
11197 self.needed_locks = {
11198 # iallocator will select nodes and even if no iallocator is used,
11199 # collisions with LUInstanceCreate should be avoided
11200 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11203 if self.op.iallocator:
11204 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11205 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11207 if self.op.opportunistic_locking:
11208 self.opportunistic_locks[locking.LEVEL_NODE] = True
11209 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11212 for inst in self.op.instances:
11213 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11214 nodeslist.append(inst.pnode)
11215 if inst.snode is not None:
11216 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11217 nodeslist.append(inst.snode)
11219 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11220 # Lock resources of instance's primary and secondary nodes (copy to
11221 # prevent accidential modification)
11222 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11224 def CheckPrereq(self):
11225 """Check prerequisite.
11228 cluster = self.cfg.GetClusterInfo()
11229 default_vg = self.cfg.GetVGName()
11230 ec_id = self.proc.GetECId()
11232 if self.op.opportunistic_locking:
11233 # Only consider nodes for which a lock is held
11234 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11236 node_whitelist = None
11238 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11239 _ComputeNics(op, cluster, None,
11241 _ComputeFullBeParams(op, cluster),
11243 for op in self.op.instances]
11245 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11246 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11248 ial.Run(self.op.iallocator)
11250 if not ial.success:
11251 raise errors.OpPrereqError("Can't compute nodes using"
11252 " iallocator '%s': %s" %
11253 (self.op.iallocator, ial.info),
11254 errors.ECODE_NORES)
11256 self.ia_result = ial.result
11258 if self.op.dry_run:
11259 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11260 constants.JOB_IDS_KEY: [],
11263 def _ConstructPartialResult(self):
11264 """Contructs the partial result.
11267 (allocatable, failed) = self.ia_result
11269 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11270 map(compat.fst, allocatable),
11271 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11274 def Exec(self, feedback_fn):
11275 """Executes the opcode.
11278 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11279 (allocatable, failed) = self.ia_result
11282 for (name, nodes) in allocatable:
11283 op = op2inst.pop(name)
11286 (op.pnode, op.snode) = nodes
11288 (op.pnode,) = nodes
11292 missing = set(op2inst.keys()) - set(failed)
11293 assert not missing, \
11294 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11296 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11299 def _CheckRADOSFreeSpace():
11300 """Compute disk size requirements inside the RADOS cluster.
11303 # For the RADOS cluster we assume there is always enough space.
11307 class LUInstanceConsole(NoHooksLU):
11308 """Connect to an instance's console.
11310 This is somewhat special in that it returns the command line that
11311 you need to run on the master node in order to connect to the
11317 def ExpandNames(self):
11318 self.share_locks = _ShareAll()
11319 self._ExpandAndLockInstance()
11321 def CheckPrereq(self):
11322 """Check prerequisites.
11324 This checks that the instance is in the cluster.
11327 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11328 assert self.instance is not None, \
11329 "Cannot retrieve locked instance %s" % self.op.instance_name
11330 _CheckNodeOnline(self, self.instance.primary_node)
11332 def Exec(self, feedback_fn):
11333 """Connect to the console of an instance
11336 instance = self.instance
11337 node = instance.primary_node
11339 node_insts = self.rpc.call_instance_list([node],
11340 [instance.hypervisor])[node]
11341 node_insts.Raise("Can't get node information from %s" % node)
11343 if instance.name not in node_insts.payload:
11344 if instance.admin_state == constants.ADMINST_UP:
11345 state = constants.INSTST_ERRORDOWN
11346 elif instance.admin_state == constants.ADMINST_DOWN:
11347 state = constants.INSTST_ADMINDOWN
11349 state = constants.INSTST_ADMINOFFLINE
11350 raise errors.OpExecError("Instance %s is not running (state %s)" %
11351 (instance.name, state))
11353 logging.debug("Connecting to console of %s on %s", instance.name, node)
11355 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11358 def _GetInstanceConsole(cluster, instance):
11359 """Returns console information for an instance.
11361 @type cluster: L{objects.Cluster}
11362 @type instance: L{objects.Instance}
11366 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11367 # beparams and hvparams are passed separately, to avoid editing the
11368 # instance and then saving the defaults in the instance itself.
11369 hvparams = cluster.FillHV(instance)
11370 beparams = cluster.FillBE(instance)
11371 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11373 assert console.instance == instance.name
11374 assert console.Validate()
11376 return console.ToDict()
11379 class LUInstanceReplaceDisks(LogicalUnit):
11380 """Replace the disks of an instance.
11383 HPATH = "mirrors-replace"
11384 HTYPE = constants.HTYPE_INSTANCE
11387 def CheckArguments(self):
11388 """Check arguments.
11391 remote_node = self.op.remote_node
11392 ialloc = self.op.iallocator
11393 if self.op.mode == constants.REPLACE_DISK_CHG:
11394 if remote_node is None and ialloc is None:
11395 raise errors.OpPrereqError("When changing the secondary either an"
11396 " iallocator script must be used or the"
11397 " new node given", errors.ECODE_INVAL)
11399 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11401 elif remote_node is not None or ialloc is not None:
11402 # Not replacing the secondary
11403 raise errors.OpPrereqError("The iallocator and new node options can"
11404 " only be used when changing the"
11405 " secondary node", errors.ECODE_INVAL)
11407 def ExpandNames(self):
11408 self._ExpandAndLockInstance()
11410 assert locking.LEVEL_NODE not in self.needed_locks
11411 assert locking.LEVEL_NODE_RES not in self.needed_locks
11412 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11414 assert self.op.iallocator is None or self.op.remote_node is None, \
11415 "Conflicting options"
11417 if self.op.remote_node is not None:
11418 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11420 # Warning: do not remove the locking of the new secondary here
11421 # unless DRBD8.AddChildren is changed to work in parallel;
11422 # currently it doesn't since parallel invocations of
11423 # FindUnusedMinor will conflict
11424 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11425 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11427 self.needed_locks[locking.LEVEL_NODE] = []
11428 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11430 if self.op.iallocator is not None:
11431 # iallocator will select a new node in the same group
11432 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11433 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11435 self.needed_locks[locking.LEVEL_NODE_RES] = []
11437 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11438 self.op.iallocator, self.op.remote_node,
11439 self.op.disks, self.op.early_release,
11440 self.op.ignore_ipolicy)
11442 self.tasklets = [self.replacer]
11444 def DeclareLocks(self, level):
11445 if level == locking.LEVEL_NODEGROUP:
11446 assert self.op.remote_node is None
11447 assert self.op.iallocator is not None
11448 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11450 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11451 # Lock all groups used by instance optimistically; this requires going
11452 # via the node before it's locked, requiring verification later on
11453 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11454 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11456 elif level == locking.LEVEL_NODE:
11457 if self.op.iallocator is not None:
11458 assert self.op.remote_node is None
11459 assert not self.needed_locks[locking.LEVEL_NODE]
11460 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11462 # Lock member nodes of all locked groups
11463 self.needed_locks[locking.LEVEL_NODE] = \
11465 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11466 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11468 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11470 self._LockInstancesNodes()
11472 elif level == locking.LEVEL_NODE_RES:
11474 self.needed_locks[locking.LEVEL_NODE_RES] = \
11475 self.needed_locks[locking.LEVEL_NODE]
11477 def BuildHooksEnv(self):
11478 """Build hooks env.
11480 This runs on the master, the primary and all the secondaries.
11483 instance = self.replacer.instance
11485 "MODE": self.op.mode,
11486 "NEW_SECONDARY": self.op.remote_node,
11487 "OLD_SECONDARY": instance.secondary_nodes[0],
11489 env.update(_BuildInstanceHookEnvByObject(self, instance))
11492 def BuildHooksNodes(self):
11493 """Build hooks nodes.
11496 instance = self.replacer.instance
11498 self.cfg.GetMasterNode(),
11499 instance.primary_node,
11501 if self.op.remote_node is not None:
11502 nl.append(self.op.remote_node)
11505 def CheckPrereq(self):
11506 """Check prerequisites.
11509 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11510 self.op.iallocator is None)
11512 # Verify if node group locks are still correct
11513 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11515 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11517 return LogicalUnit.CheckPrereq(self)
11520 class TLReplaceDisks(Tasklet):
11521 """Replaces disks for an instance.
11523 Note: Locking is not within the scope of this class.
11526 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11527 disks, early_release, ignore_ipolicy):
11528 """Initializes this class.
11531 Tasklet.__init__(self, lu)
11534 self.instance_name = instance_name
11536 self.iallocator_name = iallocator_name
11537 self.remote_node = remote_node
11539 self.early_release = early_release
11540 self.ignore_ipolicy = ignore_ipolicy
11543 self.instance = None
11544 self.new_node = None
11545 self.target_node = None
11546 self.other_node = None
11547 self.remote_node_info = None
11548 self.node_secondary_ip = None
11551 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11552 """Compute a new secondary node using an IAllocator.
11555 req = iallocator.IAReqRelocate(name=instance_name,
11556 relocate_from=list(relocate_from))
11557 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11559 ial.Run(iallocator_name)
11561 if not ial.success:
11562 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11563 " %s" % (iallocator_name, ial.info),
11564 errors.ECODE_NORES)
11566 remote_node_name = ial.result[0]
11568 lu.LogInfo("Selected new secondary for instance '%s': %s",
11569 instance_name, remote_node_name)
11571 return remote_node_name
11573 def _FindFaultyDisks(self, node_name):
11574 """Wrapper for L{_FindFaultyInstanceDisks}.
11577 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11580 def _CheckDisksActivated(self, instance):
11581 """Checks if the instance disks are activated.
11583 @param instance: The instance to check disks
11584 @return: True if they are activated, False otherwise
11587 nodes = instance.all_nodes
11589 for idx, dev in enumerate(instance.disks):
11591 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11592 self.cfg.SetDiskID(dev, node)
11594 result = _BlockdevFind(self, node, dev, instance)
11598 elif result.fail_msg or not result.payload:
11603 def CheckPrereq(self):
11604 """Check prerequisites.
11606 This checks that the instance is in the cluster.
11609 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11610 assert instance is not None, \
11611 "Cannot retrieve locked instance %s" % self.instance_name
11613 if instance.disk_template != constants.DT_DRBD8:
11614 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11615 " instances", errors.ECODE_INVAL)
11617 if len(instance.secondary_nodes) != 1:
11618 raise errors.OpPrereqError("The instance has a strange layout,"
11619 " expected one secondary but found %d" %
11620 len(instance.secondary_nodes),
11621 errors.ECODE_FAULT)
11623 instance = self.instance
11624 secondary_node = instance.secondary_nodes[0]
11626 if self.iallocator_name is None:
11627 remote_node = self.remote_node
11629 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11630 instance.name, instance.secondary_nodes)
11632 if remote_node is None:
11633 self.remote_node_info = None
11635 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11636 "Remote node '%s' is not locked" % remote_node
11638 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11639 assert self.remote_node_info is not None, \
11640 "Cannot retrieve locked node %s" % remote_node
11642 if remote_node == self.instance.primary_node:
11643 raise errors.OpPrereqError("The specified node is the primary node of"
11644 " the instance", errors.ECODE_INVAL)
11646 if remote_node == secondary_node:
11647 raise errors.OpPrereqError("The specified node is already the"
11648 " secondary node of the instance",
11649 errors.ECODE_INVAL)
11651 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11652 constants.REPLACE_DISK_CHG):
11653 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11654 errors.ECODE_INVAL)
11656 if self.mode == constants.REPLACE_DISK_AUTO:
11657 if not self._CheckDisksActivated(instance):
11658 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11659 " first" % self.instance_name,
11660 errors.ECODE_STATE)
11661 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11662 faulty_secondary = self._FindFaultyDisks(secondary_node)
11664 if faulty_primary and faulty_secondary:
11665 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11666 " one node and can not be repaired"
11667 " automatically" % self.instance_name,
11668 errors.ECODE_STATE)
11671 self.disks = faulty_primary
11672 self.target_node = instance.primary_node
11673 self.other_node = secondary_node
11674 check_nodes = [self.target_node, self.other_node]
11675 elif faulty_secondary:
11676 self.disks = faulty_secondary
11677 self.target_node = secondary_node
11678 self.other_node = instance.primary_node
11679 check_nodes = [self.target_node, self.other_node]
11685 # Non-automatic modes
11686 if self.mode == constants.REPLACE_DISK_PRI:
11687 self.target_node = instance.primary_node
11688 self.other_node = secondary_node
11689 check_nodes = [self.target_node, self.other_node]
11691 elif self.mode == constants.REPLACE_DISK_SEC:
11692 self.target_node = secondary_node
11693 self.other_node = instance.primary_node
11694 check_nodes = [self.target_node, self.other_node]
11696 elif self.mode == constants.REPLACE_DISK_CHG:
11697 self.new_node = remote_node
11698 self.other_node = instance.primary_node
11699 self.target_node = secondary_node
11700 check_nodes = [self.new_node, self.other_node]
11702 _CheckNodeNotDrained(self.lu, remote_node)
11703 _CheckNodeVmCapable(self.lu, remote_node)
11705 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11706 assert old_node_info is not None
11707 if old_node_info.offline and not self.early_release:
11708 # doesn't make sense to delay the release
11709 self.early_release = True
11710 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11711 " early-release mode", secondary_node)
11714 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11717 # If not specified all disks should be replaced
11719 self.disks = range(len(self.instance.disks))
11721 # TODO: This is ugly, but right now we can't distinguish between internal
11722 # submitted opcode and external one. We should fix that.
11723 if self.remote_node_info:
11724 # We change the node, lets verify it still meets instance policy
11725 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11726 cluster = self.cfg.GetClusterInfo()
11727 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11729 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11730 ignore=self.ignore_ipolicy)
11732 for node in check_nodes:
11733 _CheckNodeOnline(self.lu, node)
11735 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11738 if node_name is not None)
11740 # Release unneeded node and node resource locks
11741 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11742 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11743 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11745 # Release any owned node group
11746 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11748 # Check whether disks are valid
11749 for disk_idx in self.disks:
11750 instance.FindDisk(disk_idx)
11752 # Get secondary node IP addresses
11753 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11754 in self.cfg.GetMultiNodeInfo(touched_nodes))
11756 def Exec(self, feedback_fn):
11757 """Execute disk replacement.
11759 This dispatches the disk replacement to the appropriate handler.
11763 # Verify owned locks before starting operation
11764 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11765 assert set(owned_nodes) == set(self.node_secondary_ip), \
11766 ("Incorrect node locks, owning %s, expected %s" %
11767 (owned_nodes, self.node_secondary_ip.keys()))
11768 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11769 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11770 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11772 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11773 assert list(owned_instances) == [self.instance_name], \
11774 "Instance '%s' not locked" % self.instance_name
11776 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11777 "Should not own any node group lock at this point"
11780 feedback_fn("No disks need replacement for instance '%s'" %
11781 self.instance.name)
11784 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11785 (utils.CommaJoin(self.disks), self.instance.name))
11786 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11787 feedback_fn("Current seconary node: %s" %
11788 utils.CommaJoin(self.instance.secondary_nodes))
11790 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11792 # Activate the instance disks if we're replacing them on a down instance
11794 _StartInstanceDisks(self.lu, self.instance, True)
11797 # Should we replace the secondary node?
11798 if self.new_node is not None:
11799 fn = self._ExecDrbd8Secondary
11801 fn = self._ExecDrbd8DiskOnly
11803 result = fn(feedback_fn)
11805 # Deactivate the instance disks if we're replacing them on a
11808 _SafeShutdownInstanceDisks(self.lu, self.instance)
11810 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11813 # Verify owned locks
11814 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11815 nodes = frozenset(self.node_secondary_ip)
11816 assert ((self.early_release and not owned_nodes) or
11817 (not self.early_release and not (set(owned_nodes) - nodes))), \
11818 ("Not owning the correct locks, early_release=%s, owned=%r,"
11819 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11823 def _CheckVolumeGroup(self, nodes):
11824 self.lu.LogInfo("Checking volume groups")
11826 vgname = self.cfg.GetVGName()
11828 # Make sure volume group exists on all involved nodes
11829 results = self.rpc.call_vg_list(nodes)
11831 raise errors.OpExecError("Can't list volume groups on the nodes")
11834 res = results[node]
11835 res.Raise("Error checking node %s" % node)
11836 if vgname not in res.payload:
11837 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11840 def _CheckDisksExistence(self, nodes):
11841 # Check disk existence
11842 for idx, dev in enumerate(self.instance.disks):
11843 if idx not in self.disks:
11847 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11848 self.cfg.SetDiskID(dev, node)
11850 result = _BlockdevFind(self, node, dev, self.instance)
11852 msg = result.fail_msg
11853 if msg or not result.payload:
11855 msg = "disk not found"
11856 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11859 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11860 for idx, dev in enumerate(self.instance.disks):
11861 if idx not in self.disks:
11864 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11867 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11868 on_primary, ldisk=ldisk):
11869 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11870 " replace disks for instance %s" %
11871 (node_name, self.instance.name))
11873 def _CreateNewStorage(self, node_name):
11874 """Create new storage on the primary or secondary node.
11876 This is only used for same-node replaces, not for changing the
11877 secondary node, hence we don't want to modify the existing disk.
11882 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11883 for idx, dev in enumerate(disks):
11884 if idx not in self.disks:
11887 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11889 self.cfg.SetDiskID(dev, node_name)
11891 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11892 names = _GenerateUniqueNames(self.lu, lv_names)
11894 (data_disk, meta_disk) = dev.children
11895 vg_data = data_disk.logical_id[0]
11896 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11897 logical_id=(vg_data, names[0]),
11898 params=data_disk.params)
11899 vg_meta = meta_disk.logical_id[0]
11900 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11901 size=constants.DRBD_META_SIZE,
11902 logical_id=(vg_meta, names[1]),
11903 params=meta_disk.params)
11905 new_lvs = [lv_data, lv_meta]
11906 old_lvs = [child.Copy() for child in dev.children]
11907 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11908 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11910 # we pass force_create=True to force the LVM creation
11911 for new_lv in new_lvs:
11912 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11913 _GetInstanceInfoText(self.instance), False,
11918 def _CheckDevices(self, node_name, iv_names):
11919 for name, (dev, _, _) in iv_names.iteritems():
11920 self.cfg.SetDiskID(dev, node_name)
11922 result = _BlockdevFind(self, node_name, dev, self.instance)
11924 msg = result.fail_msg
11925 if msg or not result.payload:
11927 msg = "disk not found"
11928 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11931 if result.payload.is_degraded:
11932 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11934 def _RemoveOldStorage(self, node_name, iv_names):
11935 for name, (_, old_lvs, _) in iv_names.iteritems():
11936 self.lu.LogInfo("Remove logical volumes for %s", name)
11939 self.cfg.SetDiskID(lv, node_name)
11941 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11943 self.lu.LogWarning("Can't remove old LV: %s", msg,
11944 hint="remove unused LVs manually")
11946 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11947 """Replace a disk on the primary or secondary for DRBD 8.
11949 The algorithm for replace is quite complicated:
11951 1. for each disk to be replaced:
11953 1. create new LVs on the target node with unique names
11954 1. detach old LVs from the drbd device
11955 1. rename old LVs to name_replaced.<time_t>
11956 1. rename new LVs to old LVs
11957 1. attach the new LVs (with the old names now) to the drbd device
11959 1. wait for sync across all devices
11961 1. for each modified disk:
11963 1. remove old LVs (which have the name name_replaces.<time_t>)
11965 Failures are not very well handled.
11970 # Step: check device activation
11971 self.lu.LogStep(1, steps_total, "Check device existence")
11972 self._CheckDisksExistence([self.other_node, self.target_node])
11973 self._CheckVolumeGroup([self.target_node, self.other_node])
11975 # Step: check other node consistency
11976 self.lu.LogStep(2, steps_total, "Check peer consistency")
11977 self._CheckDisksConsistency(self.other_node,
11978 self.other_node == self.instance.primary_node,
11981 # Step: create new storage
11982 self.lu.LogStep(3, steps_total, "Allocate new storage")
11983 iv_names = self._CreateNewStorage(self.target_node)
11985 # Step: for each lv, detach+rename*2+attach
11986 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11987 for dev, old_lvs, new_lvs in iv_names.itervalues():
11988 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11990 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11992 result.Raise("Can't detach drbd from local storage on node"
11993 " %s for device %s" % (self.target_node, dev.iv_name))
11995 #cfg.Update(instance)
11997 # ok, we created the new LVs, so now we know we have the needed
11998 # storage; as such, we proceed on the target node to rename
11999 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12000 # using the assumption that logical_id == physical_id (which in
12001 # turn is the unique_id on that node)
12003 # FIXME(iustin): use a better name for the replaced LVs
12004 temp_suffix = int(time.time())
12005 ren_fn = lambda d, suff: (d.physical_id[0],
12006 d.physical_id[1] + "_replaced-%s" % suff)
12008 # Build the rename list based on what LVs exist on the node
12009 rename_old_to_new = []
12010 for to_ren in old_lvs:
12011 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12012 if not result.fail_msg and result.payload:
12014 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12016 self.lu.LogInfo("Renaming the old LVs on the target node")
12017 result = self.rpc.call_blockdev_rename(self.target_node,
12019 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12021 # Now we rename the new LVs to the old LVs
12022 self.lu.LogInfo("Renaming the new LVs on the target node")
12023 rename_new_to_old = [(new, old.physical_id)
12024 for old, new in zip(old_lvs, new_lvs)]
12025 result = self.rpc.call_blockdev_rename(self.target_node,
12027 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12029 # Intermediate steps of in memory modifications
12030 for old, new in zip(old_lvs, new_lvs):
12031 new.logical_id = old.logical_id
12032 self.cfg.SetDiskID(new, self.target_node)
12034 # We need to modify old_lvs so that removal later removes the
12035 # right LVs, not the newly added ones; note that old_lvs is a
12037 for disk in old_lvs:
12038 disk.logical_id = ren_fn(disk, temp_suffix)
12039 self.cfg.SetDiskID(disk, self.target_node)
12041 # Now that the new lvs have the old name, we can add them to the device
12042 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12043 result = self.rpc.call_blockdev_addchildren(self.target_node,
12044 (dev, self.instance), new_lvs)
12045 msg = result.fail_msg
12047 for new_lv in new_lvs:
12048 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12051 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12052 hint=("cleanup manually the unused logical"
12054 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12056 cstep = itertools.count(5)
12058 if self.early_release:
12059 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12060 self._RemoveOldStorage(self.target_node, iv_names)
12061 # TODO: Check if releasing locks early still makes sense
12062 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12064 # Release all resource locks except those used by the instance
12065 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12066 keep=self.node_secondary_ip.keys())
12068 # Release all node locks while waiting for sync
12069 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12071 # TODO: Can the instance lock be downgraded here? Take the optional disk
12072 # shutdown in the caller into consideration.
12075 # This can fail as the old devices are degraded and _WaitForSync
12076 # does a combined result over all disks, so we don't check its return value
12077 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12078 _WaitForSync(self.lu, self.instance)
12080 # Check all devices manually
12081 self._CheckDevices(self.instance.primary_node, iv_names)
12083 # Step: remove old storage
12084 if not self.early_release:
12085 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12086 self._RemoveOldStorage(self.target_node, iv_names)
12088 def _ExecDrbd8Secondary(self, feedback_fn):
12089 """Replace the secondary node for DRBD 8.
12091 The algorithm for replace is quite complicated:
12092 - for all disks of the instance:
12093 - create new LVs on the new node with same names
12094 - shutdown the drbd device on the old secondary
12095 - disconnect the drbd network on the primary
12096 - create the drbd device on the new secondary
12097 - network attach the drbd on the primary, using an artifice:
12098 the drbd code for Attach() will connect to the network if it
12099 finds a device which is connected to the good local disks but
12100 not network enabled
12101 - wait for sync across all devices
12102 - remove all disks from the old secondary
12104 Failures are not very well handled.
12109 pnode = self.instance.primary_node
12111 # Step: check device activation
12112 self.lu.LogStep(1, steps_total, "Check device existence")
12113 self._CheckDisksExistence([self.instance.primary_node])
12114 self._CheckVolumeGroup([self.instance.primary_node])
12116 # Step: check other node consistency
12117 self.lu.LogStep(2, steps_total, "Check peer consistency")
12118 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12120 # Step: create new storage
12121 self.lu.LogStep(3, steps_total, "Allocate new storage")
12122 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12123 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12124 for idx, dev in enumerate(disks):
12125 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12126 (self.new_node, idx))
12127 # we pass force_create=True to force LVM creation
12128 for new_lv in dev.children:
12129 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12130 True, _GetInstanceInfoText(self.instance), False,
12133 # Step 4: dbrd minors and drbd setups changes
12134 # after this, we must manually remove the drbd minors on both the
12135 # error and the success paths
12136 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12137 minors = self.cfg.AllocateDRBDMinor([self.new_node
12138 for dev in self.instance.disks],
12139 self.instance.name)
12140 logging.debug("Allocated minors %r", minors)
12143 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12144 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12145 (self.new_node, idx))
12146 # create new devices on new_node; note that we create two IDs:
12147 # one without port, so the drbd will be activated without
12148 # networking information on the new node at this stage, and one
12149 # with network, for the latter activation in step 4
12150 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12151 if self.instance.primary_node == o_node1:
12154 assert self.instance.primary_node == o_node2, "Three-node instance?"
12157 new_alone_id = (self.instance.primary_node, self.new_node, None,
12158 p_minor, new_minor, o_secret)
12159 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12160 p_minor, new_minor, o_secret)
12162 iv_names[idx] = (dev, dev.children, new_net_id)
12163 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12165 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12166 logical_id=new_alone_id,
12167 children=dev.children,
12170 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12173 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12175 _GetInstanceInfoText(self.instance), False,
12177 except errors.GenericError:
12178 self.cfg.ReleaseDRBDMinors(self.instance.name)
12181 # We have new devices, shutdown the drbd on the old secondary
12182 for idx, dev in enumerate(self.instance.disks):
12183 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12184 self.cfg.SetDiskID(dev, self.target_node)
12185 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12186 (dev, self.instance)).fail_msg
12188 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12189 "node: %s" % (idx, msg),
12190 hint=("Please cleanup this device manually as"
12191 " soon as possible"))
12193 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12194 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12195 self.instance.disks)[pnode]
12197 msg = result.fail_msg
12199 # detaches didn't succeed (unlikely)
12200 self.cfg.ReleaseDRBDMinors(self.instance.name)
12201 raise errors.OpExecError("Can't detach the disks from the network on"
12202 " old node: %s" % (msg,))
12204 # if we managed to detach at least one, we update all the disks of
12205 # the instance to point to the new secondary
12206 self.lu.LogInfo("Updating instance configuration")
12207 for dev, _, new_logical_id in iv_names.itervalues():
12208 dev.logical_id = new_logical_id
12209 self.cfg.SetDiskID(dev, self.instance.primary_node)
12211 self.cfg.Update(self.instance, feedback_fn)
12213 # Release all node locks (the configuration has been updated)
12214 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12216 # and now perform the drbd attach
12217 self.lu.LogInfo("Attaching primary drbds to new secondary"
12218 " (standalone => connected)")
12219 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12221 self.node_secondary_ip,
12222 (self.instance.disks, self.instance),
12223 self.instance.name,
12225 for to_node, to_result in result.items():
12226 msg = to_result.fail_msg
12228 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12230 hint=("please do a gnt-instance info to see the"
12231 " status of disks"))
12233 cstep = itertools.count(5)
12235 if self.early_release:
12236 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12237 self._RemoveOldStorage(self.target_node, iv_names)
12238 # TODO: Check if releasing locks early still makes sense
12239 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12241 # Release all resource locks except those used by the instance
12242 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12243 keep=self.node_secondary_ip.keys())
12245 # TODO: Can the instance lock be downgraded here? Take the optional disk
12246 # shutdown in the caller into consideration.
12249 # This can fail as the old devices are degraded and _WaitForSync
12250 # does a combined result over all disks, so we don't check its return value
12251 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12252 _WaitForSync(self.lu, self.instance)
12254 # Check all devices manually
12255 self._CheckDevices(self.instance.primary_node, iv_names)
12257 # Step: remove old storage
12258 if not self.early_release:
12259 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12260 self._RemoveOldStorage(self.target_node, iv_names)
12263 class LURepairNodeStorage(NoHooksLU):
12264 """Repairs the volume group on a node.
12269 def CheckArguments(self):
12270 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12272 storage_type = self.op.storage_type
12274 if (constants.SO_FIX_CONSISTENCY not in
12275 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12276 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12277 " repaired" % storage_type,
12278 errors.ECODE_INVAL)
12280 def ExpandNames(self):
12281 self.needed_locks = {
12282 locking.LEVEL_NODE: [self.op.node_name],
12285 def _CheckFaultyDisks(self, instance, node_name):
12286 """Ensure faulty disks abort the opcode or at least warn."""
12288 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12290 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12291 " node '%s'" % (instance.name, node_name),
12292 errors.ECODE_STATE)
12293 except errors.OpPrereqError, err:
12294 if self.op.ignore_consistency:
12295 self.LogWarning(str(err.args[0]))
12299 def CheckPrereq(self):
12300 """Check prerequisites.
12303 # Check whether any instance on this node has faulty disks
12304 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12305 if inst.admin_state != constants.ADMINST_UP:
12307 check_nodes = set(inst.all_nodes)
12308 check_nodes.discard(self.op.node_name)
12309 for inst_node_name in check_nodes:
12310 self._CheckFaultyDisks(inst, inst_node_name)
12312 def Exec(self, feedback_fn):
12313 feedback_fn("Repairing storage unit '%s' on %s ..." %
12314 (self.op.name, self.op.node_name))
12316 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12317 result = self.rpc.call_storage_execute(self.op.node_name,
12318 self.op.storage_type, st_args,
12320 constants.SO_FIX_CONSISTENCY)
12321 result.Raise("Failed to repair storage unit '%s' on %s" %
12322 (self.op.name, self.op.node_name))
12325 class LUNodeEvacuate(NoHooksLU):
12326 """Evacuates instances off a list of nodes.
12331 _MODE2IALLOCATOR = {
12332 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12333 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12334 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12336 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12337 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12338 constants.IALLOCATOR_NEVAC_MODES)
12340 def CheckArguments(self):
12341 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12343 def ExpandNames(self):
12344 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12346 if self.op.remote_node is not None:
12347 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12348 assert self.op.remote_node
12350 if self.op.remote_node == self.op.node_name:
12351 raise errors.OpPrereqError("Can not use evacuated node as a new"
12352 " secondary node", errors.ECODE_INVAL)
12354 if self.op.mode != constants.NODE_EVAC_SEC:
12355 raise errors.OpPrereqError("Without the use of an iallocator only"
12356 " secondary instances can be evacuated",
12357 errors.ECODE_INVAL)
12360 self.share_locks = _ShareAll()
12361 self.needed_locks = {
12362 locking.LEVEL_INSTANCE: [],
12363 locking.LEVEL_NODEGROUP: [],
12364 locking.LEVEL_NODE: [],
12367 # Determine nodes (via group) optimistically, needs verification once locks
12368 # have been acquired
12369 self.lock_nodes = self._DetermineNodes()
12371 def _DetermineNodes(self):
12372 """Gets the list of nodes to operate on.
12375 if self.op.remote_node is None:
12376 # Iallocator will choose any node(s) in the same group
12377 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12379 group_nodes = frozenset([self.op.remote_node])
12381 # Determine nodes to be locked
12382 return set([self.op.node_name]) | group_nodes
12384 def _DetermineInstances(self):
12385 """Builds list of instances to operate on.
12388 assert self.op.mode in constants.NODE_EVAC_MODES
12390 if self.op.mode == constants.NODE_EVAC_PRI:
12391 # Primary instances only
12392 inst_fn = _GetNodePrimaryInstances
12393 assert self.op.remote_node is None, \
12394 "Evacuating primary instances requires iallocator"
12395 elif self.op.mode == constants.NODE_EVAC_SEC:
12396 # Secondary instances only
12397 inst_fn = _GetNodeSecondaryInstances
12400 assert self.op.mode == constants.NODE_EVAC_ALL
12401 inst_fn = _GetNodeInstances
12402 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12404 raise errors.OpPrereqError("Due to an issue with the iallocator"
12405 " interface it is not possible to evacuate"
12406 " all instances at once; specify explicitly"
12407 " whether to evacuate primary or secondary"
12409 errors.ECODE_INVAL)
12411 return inst_fn(self.cfg, self.op.node_name)
12413 def DeclareLocks(self, level):
12414 if level == locking.LEVEL_INSTANCE:
12415 # Lock instances optimistically, needs verification once node and group
12416 # locks have been acquired
12417 self.needed_locks[locking.LEVEL_INSTANCE] = \
12418 set(i.name for i in self._DetermineInstances())
12420 elif level == locking.LEVEL_NODEGROUP:
12421 # Lock node groups for all potential target nodes optimistically, needs
12422 # verification once nodes have been acquired
12423 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12424 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12426 elif level == locking.LEVEL_NODE:
12427 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12429 def CheckPrereq(self):
12431 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12432 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12433 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12435 need_nodes = self._DetermineNodes()
12437 if not owned_nodes.issuperset(need_nodes):
12438 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12439 " locks were acquired, current nodes are"
12440 " are '%s', used to be '%s'; retry the"
12442 (self.op.node_name,
12443 utils.CommaJoin(need_nodes),
12444 utils.CommaJoin(owned_nodes)),
12445 errors.ECODE_STATE)
12447 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12448 if owned_groups != wanted_groups:
12449 raise errors.OpExecError("Node groups changed since locks were acquired,"
12450 " current groups are '%s', used to be '%s';"
12451 " retry the operation" %
12452 (utils.CommaJoin(wanted_groups),
12453 utils.CommaJoin(owned_groups)))
12455 # Determine affected instances
12456 self.instances = self._DetermineInstances()
12457 self.instance_names = [i.name for i in self.instances]
12459 if set(self.instance_names) != owned_instances:
12460 raise errors.OpExecError("Instances on node '%s' changed since locks"
12461 " were acquired, current instances are '%s',"
12462 " used to be '%s'; retry the operation" %
12463 (self.op.node_name,
12464 utils.CommaJoin(self.instance_names),
12465 utils.CommaJoin(owned_instances)))
12467 if self.instance_names:
12468 self.LogInfo("Evacuating instances from node '%s': %s",
12470 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12472 self.LogInfo("No instances to evacuate from node '%s'",
12475 if self.op.remote_node is not None:
12476 for i in self.instances:
12477 if i.primary_node == self.op.remote_node:
12478 raise errors.OpPrereqError("Node %s is the primary node of"
12479 " instance %s, cannot use it as"
12481 (self.op.remote_node, i.name),
12482 errors.ECODE_INVAL)
12484 def Exec(self, feedback_fn):
12485 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12487 if not self.instance_names:
12488 # No instances to evacuate
12491 elif self.op.iallocator is not None:
12492 # TODO: Implement relocation to other group
12493 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12494 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12495 instances=list(self.instance_names))
12496 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12498 ial.Run(self.op.iallocator)
12500 if not ial.success:
12501 raise errors.OpPrereqError("Can't compute node evacuation using"
12502 " iallocator '%s': %s" %
12503 (self.op.iallocator, ial.info),
12504 errors.ECODE_NORES)
12506 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12508 elif self.op.remote_node is not None:
12509 assert self.op.mode == constants.NODE_EVAC_SEC
12511 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12512 remote_node=self.op.remote_node,
12514 mode=constants.REPLACE_DISK_CHG,
12515 early_release=self.op.early_release)]
12516 for instance_name in self.instance_names]
12519 raise errors.ProgrammerError("No iallocator or remote node")
12521 return ResultWithJobs(jobs)
12524 def _SetOpEarlyRelease(early_release, op):
12525 """Sets C{early_release} flag on opcodes if available.
12529 op.early_release = early_release
12530 except AttributeError:
12531 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12536 def _NodeEvacDest(use_nodes, group, nodes):
12537 """Returns group or nodes depending on caller's choice.
12541 return utils.CommaJoin(nodes)
12546 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12547 """Unpacks the result of change-group and node-evacuate iallocator requests.
12549 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12550 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12552 @type lu: L{LogicalUnit}
12553 @param lu: Logical unit instance
12554 @type alloc_result: tuple/list
12555 @param alloc_result: Result from iallocator
12556 @type early_release: bool
12557 @param early_release: Whether to release locks early if possible
12558 @type use_nodes: bool
12559 @param use_nodes: Whether to display node names instead of groups
12562 (moved, failed, jobs) = alloc_result
12565 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12566 for (name, reason) in failed)
12567 lu.LogWarning("Unable to evacuate instances %s", failreason)
12568 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12571 lu.LogInfo("Instances to be moved: %s",
12572 utils.CommaJoin("%s (to %s)" %
12573 (name, _NodeEvacDest(use_nodes, group, nodes))
12574 for (name, group, nodes) in moved))
12576 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12577 map(opcodes.OpCode.LoadOpCode, ops))
12581 def _DiskSizeInBytesToMebibytes(lu, size):
12582 """Converts a disk size in bytes to mebibytes.
12584 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12587 (mib, remainder) = divmod(size, 1024 * 1024)
12590 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12591 " to not overwrite existing data (%s bytes will not be"
12592 " wiped)", (1024 * 1024) - remainder)
12598 class LUInstanceGrowDisk(LogicalUnit):
12599 """Grow a disk of an instance.
12602 HPATH = "disk-grow"
12603 HTYPE = constants.HTYPE_INSTANCE
12606 def ExpandNames(self):
12607 self._ExpandAndLockInstance()
12608 self.needed_locks[locking.LEVEL_NODE] = []
12609 self.needed_locks[locking.LEVEL_NODE_RES] = []
12610 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12611 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12613 def DeclareLocks(self, level):
12614 if level == locking.LEVEL_NODE:
12615 self._LockInstancesNodes()
12616 elif level == locking.LEVEL_NODE_RES:
12618 self.needed_locks[locking.LEVEL_NODE_RES] = \
12619 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12621 def BuildHooksEnv(self):
12622 """Build hooks env.
12624 This runs on the master, the primary and all the secondaries.
12628 "DISK": self.op.disk,
12629 "AMOUNT": self.op.amount,
12630 "ABSOLUTE": self.op.absolute,
12632 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12635 def BuildHooksNodes(self):
12636 """Build hooks nodes.
12639 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12642 def CheckPrereq(self):
12643 """Check prerequisites.
12645 This checks that the instance is in the cluster.
12648 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12649 assert instance is not None, \
12650 "Cannot retrieve locked instance %s" % self.op.instance_name
12651 nodenames = list(instance.all_nodes)
12652 for node in nodenames:
12653 _CheckNodeOnline(self, node)
12655 self.instance = instance
12657 if instance.disk_template not in constants.DTS_GROWABLE:
12658 raise errors.OpPrereqError("Instance's disk layout does not support"
12659 " growing", errors.ECODE_INVAL)
12661 self.disk = instance.FindDisk(self.op.disk)
12663 if self.op.absolute:
12664 self.target = self.op.amount
12665 self.delta = self.target - self.disk.size
12667 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12668 "current disk size (%s)" %
12669 (utils.FormatUnit(self.target, "h"),
12670 utils.FormatUnit(self.disk.size, "h")),
12671 errors.ECODE_STATE)
12673 self.delta = self.op.amount
12674 self.target = self.disk.size + self.delta
12676 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12677 utils.FormatUnit(self.delta, "h"),
12678 errors.ECODE_INVAL)
12680 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12682 def _CheckDiskSpace(self, nodenames, req_vgspace):
12683 template = self.instance.disk_template
12684 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12685 # TODO: check the free disk space for file, when that feature will be
12687 nodes = map(self.cfg.GetNodeInfo, nodenames)
12688 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12691 # With exclusive storage we need to something smarter than just looking
12692 # at free space; for now, let's simply abort the operation.
12693 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12694 " is enabled", errors.ECODE_STATE)
12695 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12697 def Exec(self, feedback_fn):
12698 """Execute disk grow.
12701 instance = self.instance
12704 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12705 assert (self.owned_locks(locking.LEVEL_NODE) ==
12706 self.owned_locks(locking.LEVEL_NODE_RES))
12708 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12710 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12712 raise errors.OpExecError("Cannot activate block device to grow")
12714 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12715 (self.op.disk, instance.name,
12716 utils.FormatUnit(self.delta, "h"),
12717 utils.FormatUnit(self.target, "h")))
12719 # First run all grow ops in dry-run mode
12720 for node in instance.all_nodes:
12721 self.cfg.SetDiskID(disk, node)
12722 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12724 result.Raise("Dry-run grow request failed to node %s" % node)
12727 # Get disk size from primary node for wiping
12728 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12729 result.Raise("Failed to retrieve disk size from node '%s'" %
12730 instance.primary_node)
12732 (disk_size_in_bytes, ) = result.payload
12734 if disk_size_in_bytes is None:
12735 raise errors.OpExecError("Failed to retrieve disk size from primary"
12736 " node '%s'" % instance.primary_node)
12738 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12740 assert old_disk_size >= disk.size, \
12741 ("Retrieved disk size too small (got %s, should be at least %s)" %
12742 (old_disk_size, disk.size))
12744 old_disk_size = None
12746 # We know that (as far as we can test) operations across different
12747 # nodes will succeed, time to run it for real on the backing storage
12748 for node in instance.all_nodes:
12749 self.cfg.SetDiskID(disk, node)
12750 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12752 result.Raise("Grow request failed to node %s" % node)
12754 # And now execute it for logical storage, on the primary node
12755 node = instance.primary_node
12756 self.cfg.SetDiskID(disk, node)
12757 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12759 result.Raise("Grow request failed to node %s" % node)
12761 disk.RecordGrow(self.delta)
12762 self.cfg.Update(instance, feedback_fn)
12764 # Changes have been recorded, release node lock
12765 _ReleaseLocks(self, locking.LEVEL_NODE)
12767 # Downgrade lock while waiting for sync
12768 self.glm.downgrade(locking.LEVEL_INSTANCE)
12770 assert wipe_disks ^ (old_disk_size is None)
12773 assert instance.disks[self.op.disk] == disk
12775 # Wipe newly added disk space
12776 _WipeDisks(self, instance,
12777 disks=[(self.op.disk, disk, old_disk_size)])
12779 if self.op.wait_for_sync:
12780 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12782 self.LogWarning("Disk syncing has not returned a good status; check"
12784 if instance.admin_state != constants.ADMINST_UP:
12785 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12786 elif instance.admin_state != constants.ADMINST_UP:
12787 self.LogWarning("Not shutting down the disk even if the instance is"
12788 " not supposed to be running because no wait for"
12789 " sync mode was requested")
12791 assert self.owned_locks(locking.LEVEL_NODE_RES)
12792 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12795 class LUInstanceQueryData(NoHooksLU):
12796 """Query runtime instance data.
12801 def ExpandNames(self):
12802 self.needed_locks = {}
12804 # Use locking if requested or when non-static information is wanted
12805 if not (self.op.static or self.op.use_locking):
12806 self.LogWarning("Non-static data requested, locks need to be acquired")
12807 self.op.use_locking = True
12809 if self.op.instances or not self.op.use_locking:
12810 # Expand instance names right here
12811 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12813 # Will use acquired locks
12814 self.wanted_names = None
12816 if self.op.use_locking:
12817 self.share_locks = _ShareAll()
12819 if self.wanted_names is None:
12820 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12822 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12824 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12825 self.needed_locks[locking.LEVEL_NODE] = []
12826 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12828 def DeclareLocks(self, level):
12829 if self.op.use_locking:
12830 if level == locking.LEVEL_NODEGROUP:
12831 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12833 # Lock all groups used by instances optimistically; this requires going
12834 # via the node before it's locked, requiring verification later on
12835 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12836 frozenset(group_uuid
12837 for instance_name in owned_instances
12839 self.cfg.GetInstanceNodeGroups(instance_name))
12841 elif level == locking.LEVEL_NODE:
12842 self._LockInstancesNodes()
12844 def CheckPrereq(self):
12845 """Check prerequisites.
12847 This only checks the optional instance list against the existing names.
12850 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12851 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12852 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12854 if self.wanted_names is None:
12855 assert self.op.use_locking, "Locking was not used"
12856 self.wanted_names = owned_instances
12858 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12860 if self.op.use_locking:
12861 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12864 assert not (owned_instances or owned_groups or owned_nodes)
12866 self.wanted_instances = instances.values()
12868 def _ComputeBlockdevStatus(self, node, instance, dev):
12869 """Returns the status of a block device
12872 if self.op.static or not node:
12875 self.cfg.SetDiskID(dev, node)
12877 result = self.rpc.call_blockdev_find(node, dev)
12881 result.Raise("Can't compute disk status for %s" % instance.name)
12883 status = result.payload
12887 return (status.dev_path, status.major, status.minor,
12888 status.sync_percent, status.estimated_time,
12889 status.is_degraded, status.ldisk_status)
12891 def _ComputeDiskStatus(self, instance, snode, dev):
12892 """Compute block device status.
12895 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12897 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12899 def _ComputeDiskStatusInner(self, instance, snode, dev):
12900 """Compute block device status.
12902 @attention: The device has to be annotated already.
12905 if dev.dev_type in constants.LDS_DRBD:
12906 # we change the snode then (otherwise we use the one passed in)
12907 if dev.logical_id[0] == instance.primary_node:
12908 snode = dev.logical_id[1]
12910 snode = dev.logical_id[0]
12912 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12914 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12917 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12924 "iv_name": dev.iv_name,
12925 "dev_type": dev.dev_type,
12926 "logical_id": dev.logical_id,
12927 "physical_id": dev.physical_id,
12928 "pstatus": dev_pstatus,
12929 "sstatus": dev_sstatus,
12930 "children": dev_children,
12935 def Exec(self, feedback_fn):
12936 """Gather and return data"""
12939 cluster = self.cfg.GetClusterInfo()
12941 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12942 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12944 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12945 for node in nodes.values()))
12947 group2name_fn = lambda uuid: groups[uuid].name
12949 for instance in self.wanted_instances:
12950 pnode = nodes[instance.primary_node]
12952 if self.op.static or pnode.offline:
12953 remote_state = None
12955 self.LogWarning("Primary node %s is marked offline, returning static"
12956 " information only for instance %s" %
12957 (pnode.name, instance.name))
12959 remote_info = self.rpc.call_instance_info(instance.primary_node,
12961 instance.hypervisor)
12962 remote_info.Raise("Error checking node %s" % instance.primary_node)
12963 remote_info = remote_info.payload
12964 if remote_info and "state" in remote_info:
12965 remote_state = "up"
12967 if instance.admin_state == constants.ADMINST_UP:
12968 remote_state = "down"
12970 remote_state = instance.admin_state
12972 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12975 snodes_group_uuids = [nodes[snode_name].group
12976 for snode_name in instance.secondary_nodes]
12978 result[instance.name] = {
12979 "name": instance.name,
12980 "config_state": instance.admin_state,
12981 "run_state": remote_state,
12982 "pnode": instance.primary_node,
12983 "pnode_group_uuid": pnode.group,
12984 "pnode_group_name": group2name_fn(pnode.group),
12985 "snodes": instance.secondary_nodes,
12986 "snodes_group_uuids": snodes_group_uuids,
12987 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12989 # this happens to be the same format used for hooks
12990 "nics": _NICListToTuple(self, instance.nics),
12991 "disk_template": instance.disk_template,
12993 "hypervisor": instance.hypervisor,
12994 "network_port": instance.network_port,
12995 "hv_instance": instance.hvparams,
12996 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12997 "be_instance": instance.beparams,
12998 "be_actual": cluster.FillBE(instance),
12999 "os_instance": instance.osparams,
13000 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13001 "serial_no": instance.serial_no,
13002 "mtime": instance.mtime,
13003 "ctime": instance.ctime,
13004 "uuid": instance.uuid,
13010 def PrepareContainerMods(mods, private_fn):
13011 """Prepares a list of container modifications by adding a private data field.
13013 @type mods: list of tuples; (operation, index, parameters)
13014 @param mods: List of modifications
13015 @type private_fn: callable or None
13016 @param private_fn: Callable for constructing a private data field for a
13021 if private_fn is None:
13026 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13029 #: Type description for changes as returned by L{ApplyContainerMods}'s
13031 _TApplyContModsCbChanges = \
13032 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13033 ht.TNonEmptyString,
13038 def ApplyContainerMods(kind, container, chgdesc, mods,
13039 create_fn, modify_fn, remove_fn):
13040 """Applies descriptions in C{mods} to C{container}.
13043 @param kind: One-word item description
13044 @type container: list
13045 @param container: Container to modify
13046 @type chgdesc: None or list
13047 @param chgdesc: List of applied changes
13049 @param mods: Modifications as returned by L{PrepareContainerMods}
13050 @type create_fn: callable
13051 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13052 receives absolute item index, parameters and private data object as added
13053 by L{PrepareContainerMods}, returns tuple containing new item and changes
13055 @type modify_fn: callable
13056 @param modify_fn: Callback for modifying an existing item
13057 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13058 and private data object as added by L{PrepareContainerMods}, returns
13060 @type remove_fn: callable
13061 @param remove_fn: Callback on removing item; receives absolute item index,
13062 item and private data object as added by L{PrepareContainerMods}
13065 for (op, idx, params, private) in mods:
13068 absidx = len(container) - 1
13070 raise IndexError("Not accepting negative indices other than -1")
13071 elif idx > len(container):
13072 raise IndexError("Got %s index %s, but there are only %s" %
13073 (kind, idx, len(container)))
13079 if op == constants.DDM_ADD:
13080 # Calculate where item will be added
13082 addidx = len(container)
13086 if create_fn is None:
13089 (item, changes) = create_fn(addidx, params, private)
13092 container.append(item)
13095 assert idx <= len(container)
13096 # list.insert does so before the specified index
13097 container.insert(idx, item)
13099 # Retrieve existing item
13101 item = container[absidx]
13103 raise IndexError("Invalid %s index %s" % (kind, idx))
13105 if op == constants.DDM_REMOVE:
13108 if remove_fn is not None:
13109 remove_fn(absidx, item, private)
13111 changes = [("%s/%s" % (kind, absidx), "remove")]
13113 assert container[absidx] == item
13114 del container[absidx]
13115 elif op == constants.DDM_MODIFY:
13116 if modify_fn is not None:
13117 changes = modify_fn(absidx, item, params, private)
13119 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13121 assert _TApplyContModsCbChanges(changes)
13123 if not (chgdesc is None or changes is None):
13124 chgdesc.extend(changes)
13127 def _UpdateIvNames(base_index, disks):
13128 """Updates the C{iv_name} attribute of disks.
13130 @type disks: list of L{objects.Disk}
13133 for (idx, disk) in enumerate(disks):
13134 disk.iv_name = "disk/%s" % (base_index + idx, )
13137 class _InstNicModPrivate:
13138 """Data structure for network interface modifications.
13140 Used by L{LUInstanceSetParams}.
13143 def __init__(self):
13148 class LUInstanceSetParams(LogicalUnit):
13149 """Modifies an instances's parameters.
13152 HPATH = "instance-modify"
13153 HTYPE = constants.HTYPE_INSTANCE
13157 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13158 assert ht.TList(mods)
13159 assert not mods or len(mods[0]) in (2, 3)
13161 if mods and len(mods[0]) == 2:
13165 for op, params in mods:
13166 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13167 result.append((op, -1, params))
13171 raise errors.OpPrereqError("Only one %s add or remove operation is"
13172 " supported at a time" % kind,
13173 errors.ECODE_INVAL)
13175 result.append((constants.DDM_MODIFY, op, params))
13177 assert verify_fn(result)
13184 def _CheckMods(kind, mods, key_types, item_fn):
13185 """Ensures requested disk/NIC modifications are valid.
13188 for (op, _, params) in mods:
13189 assert ht.TDict(params)
13191 # If 'key_types' is an empty dict, we assume we have an
13192 # 'ext' template and thus do not ForceDictType
13194 utils.ForceDictType(params, key_types)
13196 if op == constants.DDM_REMOVE:
13198 raise errors.OpPrereqError("No settings should be passed when"
13199 " removing a %s" % kind,
13200 errors.ECODE_INVAL)
13201 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13202 item_fn(op, params)
13204 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13207 def _VerifyDiskModification(op, params):
13208 """Verifies a disk modification.
13211 if op == constants.DDM_ADD:
13212 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13213 if mode not in constants.DISK_ACCESS_SET:
13214 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13215 errors.ECODE_INVAL)
13217 size = params.get(constants.IDISK_SIZE, None)
13219 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13220 constants.IDISK_SIZE, errors.ECODE_INVAL)
13224 except (TypeError, ValueError), err:
13225 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13226 errors.ECODE_INVAL)
13228 params[constants.IDISK_SIZE] = size
13230 elif op == constants.DDM_MODIFY:
13231 if constants.IDISK_SIZE in params:
13232 raise errors.OpPrereqError("Disk size change not possible, use"
13233 " grow-disk", errors.ECODE_INVAL)
13234 if constants.IDISK_MODE not in params:
13235 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13236 " modification supported, but missing",
13237 errors.ECODE_NOENT)
13238 if len(params) > 1:
13239 raise errors.OpPrereqError("Disk modification doesn't support"
13240 " additional arbitrary parameters",
13241 errors.ECODE_INVAL)
13244 def _VerifyNicModification(op, params):
13245 """Verifies a network interface modification.
13248 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13249 ip = params.get(constants.INIC_IP, None)
13250 req_net = params.get(constants.INIC_NETWORK, None)
13251 link = params.get(constants.NIC_LINK, None)
13252 mode = params.get(constants.NIC_MODE, None)
13253 if req_net is not None:
13254 if req_net.lower() == constants.VALUE_NONE:
13255 params[constants.INIC_NETWORK] = None
13257 elif link is not None or mode is not None:
13258 raise errors.OpPrereqError("If network is given"
13259 " mode or link should not",
13260 errors.ECODE_INVAL)
13262 if op == constants.DDM_ADD:
13263 macaddr = params.get(constants.INIC_MAC, None)
13264 if macaddr is None:
13265 params[constants.INIC_MAC] = constants.VALUE_AUTO
13268 if ip.lower() == constants.VALUE_NONE:
13269 params[constants.INIC_IP] = None
13271 if ip.lower() == constants.NIC_IP_POOL:
13272 if op == constants.DDM_ADD and req_net is None:
13273 raise errors.OpPrereqError("If ip=pool, parameter network"
13275 errors.ECODE_INVAL)
13277 if not netutils.IPAddress.IsValid(ip):
13278 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13279 errors.ECODE_INVAL)
13281 if constants.INIC_MAC in params:
13282 macaddr = params[constants.INIC_MAC]
13283 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13284 macaddr = utils.NormalizeAndValidateMac(macaddr)
13286 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13287 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13288 " modifying an existing NIC",
13289 errors.ECODE_INVAL)
13291 def CheckArguments(self):
13292 if not (self.op.nics or self.op.disks or self.op.disk_template or
13293 self.op.hvparams or self.op.beparams or self.op.os_name or
13294 self.op.offline is not None or self.op.runtime_mem):
13295 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13297 if self.op.hvparams:
13298 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13299 "hypervisor", "instance", "cluster")
13301 self.op.disks = self._UpgradeDiskNicMods(
13302 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13303 self.op.nics = self._UpgradeDiskNicMods(
13304 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13306 if self.op.disks and self.op.disk_template is not None:
13307 raise errors.OpPrereqError("Disk template conversion and other disk"
13308 " changes not supported at the same time",
13309 errors.ECODE_INVAL)
13311 if (self.op.disk_template and
13312 self.op.disk_template in constants.DTS_INT_MIRROR and
13313 self.op.remote_node is None):
13314 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13315 " one requires specifying a secondary node",
13316 errors.ECODE_INVAL)
13318 # Check NIC modifications
13319 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13320 self._VerifyNicModification)
13322 def ExpandNames(self):
13323 self._ExpandAndLockInstance()
13324 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13325 # Can't even acquire node locks in shared mode as upcoming changes in
13326 # Ganeti 2.6 will start to modify the node object on disk conversion
13327 self.needed_locks[locking.LEVEL_NODE] = []
13328 self.needed_locks[locking.LEVEL_NODE_RES] = []
13329 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13330 # Look node group to look up the ipolicy
13331 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13333 def DeclareLocks(self, level):
13334 if level == locking.LEVEL_NODEGROUP:
13335 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13336 # Acquire locks for the instance's nodegroups optimistically. Needs
13337 # to be verified in CheckPrereq
13338 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13339 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13340 elif level == locking.LEVEL_NODE:
13341 self._LockInstancesNodes()
13342 if self.op.disk_template and self.op.remote_node:
13343 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13344 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13345 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13347 self.needed_locks[locking.LEVEL_NODE_RES] = \
13348 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13350 def BuildHooksEnv(self):
13351 """Build hooks env.
13353 This runs on the master, primary and secondaries.
13357 if constants.BE_MINMEM in self.be_new:
13358 args["minmem"] = self.be_new[constants.BE_MINMEM]
13359 if constants.BE_MAXMEM in self.be_new:
13360 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13361 if constants.BE_VCPUS in self.be_new:
13362 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13363 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13364 # information at all.
13366 if self._new_nics is not None:
13369 for nic in self._new_nics:
13370 n = copy.deepcopy(nic)
13371 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13372 n.nicparams = nicparams
13373 nics.append(_NICToTuple(self, n))
13375 args["nics"] = nics
13377 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13378 if self.op.disk_template:
13379 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13380 if self.op.runtime_mem:
13381 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13385 def BuildHooksNodes(self):
13386 """Build hooks nodes.
13389 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13392 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13393 old_params, cluster, pnode):
13395 update_params_dict = dict([(key, params[key])
13396 for key in constants.NICS_PARAMETERS
13399 req_link = update_params_dict.get(constants.NIC_LINK, None)
13400 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13402 new_net_uuid = None
13403 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13404 if new_net_uuid_or_name:
13405 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13406 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13409 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13412 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13414 raise errors.OpPrereqError("No netparams found for the network"
13415 " %s, probably not connected" %
13416 new_net_obj.name, errors.ECODE_INVAL)
13417 new_params = dict(netparams)
13419 new_params = _GetUpdatedParams(old_params, update_params_dict)
13421 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13423 new_filled_params = cluster.SimpleFillNIC(new_params)
13424 objects.NIC.CheckParameterSyntax(new_filled_params)
13426 new_mode = new_filled_params[constants.NIC_MODE]
13427 if new_mode == constants.NIC_MODE_BRIDGED:
13428 bridge = new_filled_params[constants.NIC_LINK]
13429 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13431 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13433 self.warn.append(msg)
13435 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13437 elif new_mode == constants.NIC_MODE_ROUTED:
13438 ip = params.get(constants.INIC_IP, old_ip)
13440 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13441 " on a routed NIC", errors.ECODE_INVAL)
13443 elif new_mode == constants.NIC_MODE_OVS:
13444 # TODO: check OVS link
13445 self.LogInfo("OVS links are currently not checked for correctness")
13447 if constants.INIC_MAC in params:
13448 mac = params[constants.INIC_MAC]
13450 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13451 errors.ECODE_INVAL)
13452 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13453 # otherwise generate the MAC address
13454 params[constants.INIC_MAC] = \
13455 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13457 # or validate/reserve the current one
13459 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13460 except errors.ReservationError:
13461 raise errors.OpPrereqError("MAC address '%s' already in use"
13462 " in cluster" % mac,
13463 errors.ECODE_NOTUNIQUE)
13464 elif new_net_uuid != old_net_uuid:
13466 def get_net_prefix(net_uuid):
13469 nobj = self.cfg.GetNetwork(net_uuid)
13470 mac_prefix = nobj.mac_prefix
13474 new_prefix = get_net_prefix(new_net_uuid)
13475 old_prefix = get_net_prefix(old_net_uuid)
13476 if old_prefix != new_prefix:
13477 params[constants.INIC_MAC] = \
13478 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13480 #if there is a change in nic's ip/network configuration
13481 new_ip = params.get(constants.INIC_IP, old_ip)
13482 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13484 if new_ip.lower() == constants.NIC_IP_POOL:
13485 if not new_net_uuid:
13486 raise errors.OpPrereqError("ip=pool, but no network found",
13487 errors.ECODE_INVAL)
13489 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13490 except errors.ReservationError:
13491 raise errors.OpPrereqError("Unable to get a free IP"
13492 " from the address pool",
13493 errors.ECODE_STATE)
13494 self.LogInfo("Chose IP %s from network %s", new_ip, new_net_obj.name)
13495 params[constants.INIC_IP] = new_ip
13496 elif new_ip != old_ip or new_net_uuid != old_net_uuid:
13498 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13499 self.LogInfo("Reserving IP %s in network %s",
13500 new_ip, new_net_obj.name)
13501 except errors.ReservationError:
13502 raise errors.OpPrereqError("IP %s not available in network %s" %
13503 (new_ip, new_net_obj.name),
13504 errors.ECODE_NOTUNIQUE)
13507 elif not new_net_uuid and self.op.conflicts_check:
13508 _CheckForConflictingIp(self, new_ip, pnode)
13512 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13513 except errors.AddressPoolError:
13514 logging.warning("Release IP %s not contained in network %s",
13515 old_ip, old_net_obj.name)
13517 # there are no changes in (net, ip) tuple
13518 elif (old_net_uuid is not None and
13519 (req_link is not None or req_mode is not None)):
13520 raise errors.OpPrereqError("Not allowed to change link or mode of"
13521 " a NIC that is connected to a network",
13522 errors.ECODE_INVAL)
13524 private.params = new_params
13525 private.filled = new_filled_params
13527 def _PreCheckDiskTemplate(self, pnode_info):
13528 """CheckPrereq checks related to a new disk template."""
13529 # Arguments are passed to avoid configuration lookups
13530 instance = self.instance
13531 pnode = instance.primary_node
13532 cluster = self.cluster
13533 if instance.disk_template == self.op.disk_template:
13534 raise errors.OpPrereqError("Instance already has disk template %s" %
13535 instance.disk_template, errors.ECODE_INVAL)
13537 if (instance.disk_template,
13538 self.op.disk_template) not in self._DISK_CONVERSIONS:
13539 raise errors.OpPrereqError("Unsupported disk template conversion from"
13540 " %s to %s" % (instance.disk_template,
13541 self.op.disk_template),
13542 errors.ECODE_INVAL)
13543 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13544 msg="cannot change disk template")
13545 if self.op.disk_template in constants.DTS_INT_MIRROR:
13546 if self.op.remote_node == pnode:
13547 raise errors.OpPrereqError("Given new secondary node %s is the same"
13548 " as the primary node of the instance" %
13549 self.op.remote_node, errors.ECODE_STATE)
13550 _CheckNodeOnline(self, self.op.remote_node)
13551 _CheckNodeNotDrained(self, self.op.remote_node)
13552 # FIXME: here we assume that the old instance type is DT_PLAIN
13553 assert instance.disk_template == constants.DT_PLAIN
13554 disks = [{constants.IDISK_SIZE: d.size,
13555 constants.IDISK_VG: d.logical_id[0]}
13556 for d in instance.disks]
13557 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13558 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13560 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13561 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13562 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13564 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13565 ignore=self.op.ignore_ipolicy)
13566 if pnode_info.group != snode_info.group:
13567 self.LogWarning("The primary and secondary nodes are in two"
13568 " different node groups; the disk parameters"
13569 " from the first disk's node group will be"
13572 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13573 # Make sure none of the nodes require exclusive storage
13574 nodes = [pnode_info]
13575 if self.op.disk_template in constants.DTS_INT_MIRROR:
13577 nodes.append(snode_info)
13578 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13579 if compat.any(map(has_es, nodes)):
13580 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13581 " storage is enabled" % (instance.disk_template,
13582 self.op.disk_template))
13583 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13585 def CheckPrereq(self):
13586 """Check prerequisites.
13588 This only checks the instance list against the existing names.
13591 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13592 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13594 cluster = self.cluster = self.cfg.GetClusterInfo()
13595 assert self.instance is not None, \
13596 "Cannot retrieve locked instance %s" % self.op.instance_name
13598 pnode = instance.primary_node
13599 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13600 nodelist = list(instance.all_nodes)
13601 pnode_info = self.cfg.GetNodeInfo(pnode)
13602 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13604 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13605 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13606 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13608 # dictionary with instance information after the modification
13611 # Check disk modifications. This is done here and not in CheckArguments
13612 # (as with NICs), because we need to know the instance's disk template
13613 if instance.disk_template == constants.DT_EXT:
13614 self._CheckMods("disk", self.op.disks, {},
13615 self._VerifyDiskModification)
13617 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13618 self._VerifyDiskModification)
13620 # Prepare disk/NIC modifications
13621 self.diskmod = PrepareContainerMods(self.op.disks, None)
13622 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13624 # Check the validity of the `provider' parameter
13625 if instance.disk_template in constants.DT_EXT:
13626 for mod in self.diskmod:
13627 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13628 if mod[0] == constants.DDM_ADD:
13629 if ext_provider is None:
13630 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13631 " '%s' missing, during disk add" %
13633 constants.IDISK_PROVIDER),
13634 errors.ECODE_NOENT)
13635 elif mod[0] == constants.DDM_MODIFY:
13637 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13639 constants.IDISK_PROVIDER,
13640 errors.ECODE_INVAL)
13642 for mod in self.diskmod:
13643 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13644 if ext_provider is not None:
13645 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13646 " instances of type '%s'" %
13647 (constants.IDISK_PROVIDER,
13649 errors.ECODE_INVAL)
13652 if self.op.os_name and not self.op.force:
13653 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13654 self.op.force_variant)
13655 instance_os = self.op.os_name
13657 instance_os = instance.os
13659 assert not (self.op.disk_template and self.op.disks), \
13660 "Can't modify disk template and apply disk changes at the same time"
13662 if self.op.disk_template:
13663 self._PreCheckDiskTemplate(pnode_info)
13665 # hvparams processing
13666 if self.op.hvparams:
13667 hv_type = instance.hypervisor
13668 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13669 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13670 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13673 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13674 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13675 self.hv_proposed = self.hv_new = hv_new # the new actual values
13676 self.hv_inst = i_hvdict # the new dict (without defaults)
13678 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13680 self.hv_new = self.hv_inst = {}
13682 # beparams processing
13683 if self.op.beparams:
13684 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13686 objects.UpgradeBeParams(i_bedict)
13687 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13688 be_new = cluster.SimpleFillBE(i_bedict)
13689 self.be_proposed = self.be_new = be_new # the new actual values
13690 self.be_inst = i_bedict # the new dict (without defaults)
13692 self.be_new = self.be_inst = {}
13693 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13694 be_old = cluster.FillBE(instance)
13696 # CPU param validation -- checking every time a parameter is
13697 # changed to cover all cases where either CPU mask or vcpus have
13699 if (constants.BE_VCPUS in self.be_proposed and
13700 constants.HV_CPU_MASK in self.hv_proposed):
13702 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13703 # Verify mask is consistent with number of vCPUs. Can skip this
13704 # test if only 1 entry in the CPU mask, which means same mask
13705 # is applied to all vCPUs.
13706 if (len(cpu_list) > 1 and
13707 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13708 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13710 (self.be_proposed[constants.BE_VCPUS],
13711 self.hv_proposed[constants.HV_CPU_MASK]),
13712 errors.ECODE_INVAL)
13714 # Only perform this test if a new CPU mask is given
13715 if constants.HV_CPU_MASK in self.hv_new:
13716 # Calculate the largest CPU number requested
13717 max_requested_cpu = max(map(max, cpu_list))
13718 # Check that all of the instance's nodes have enough physical CPUs to
13719 # satisfy the requested CPU mask
13720 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13721 max_requested_cpu + 1, instance.hypervisor)
13723 # osparams processing
13724 if self.op.osparams:
13725 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13726 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13727 self.os_inst = i_osdict # the new dict (without defaults)
13733 #TODO(dynmem): do the appropriate check involving MINMEM
13734 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13735 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13736 mem_check_list = [pnode]
13737 if be_new[constants.BE_AUTO_BALANCE]:
13738 # either we changed auto_balance to yes or it was from before
13739 mem_check_list.extend(instance.secondary_nodes)
13740 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13741 instance.hypervisor)
13742 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13743 [instance.hypervisor], False)
13744 pninfo = nodeinfo[pnode]
13745 msg = pninfo.fail_msg
13747 # Assume the primary node is unreachable and go ahead
13748 self.warn.append("Can't get info from primary node %s: %s" %
13751 (_, _, (pnhvinfo, )) = pninfo.payload
13752 if not isinstance(pnhvinfo.get("memory_free", None), int):
13753 self.warn.append("Node data from primary node %s doesn't contain"
13754 " free memory information" % pnode)
13755 elif instance_info.fail_msg:
13756 self.warn.append("Can't get instance runtime information: %s" %
13757 instance_info.fail_msg)
13759 if instance_info.payload:
13760 current_mem = int(instance_info.payload["memory"])
13762 # Assume instance not running
13763 # (there is a slight race condition here, but it's not very
13764 # probable, and we have no other way to check)
13765 # TODO: Describe race condition
13767 #TODO(dynmem): do the appropriate check involving MINMEM
13768 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13769 pnhvinfo["memory_free"])
13771 raise errors.OpPrereqError("This change will prevent the instance"
13772 " from starting, due to %d MB of memory"
13773 " missing on its primary node" %
13774 miss_mem, errors.ECODE_NORES)
13776 if be_new[constants.BE_AUTO_BALANCE]:
13777 for node, nres in nodeinfo.items():
13778 if node not in instance.secondary_nodes:
13780 nres.Raise("Can't get info from secondary node %s" % node,
13781 prereq=True, ecode=errors.ECODE_STATE)
13782 (_, _, (nhvinfo, )) = nres.payload
13783 if not isinstance(nhvinfo.get("memory_free", None), int):
13784 raise errors.OpPrereqError("Secondary node %s didn't return free"
13785 " memory information" % node,
13786 errors.ECODE_STATE)
13787 #TODO(dynmem): do the appropriate check involving MINMEM
13788 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13789 raise errors.OpPrereqError("This change will prevent the instance"
13790 " from failover to its secondary node"
13791 " %s, due to not enough memory" % node,
13792 errors.ECODE_STATE)
13794 if self.op.runtime_mem:
13795 remote_info = self.rpc.call_instance_info(instance.primary_node,
13797 instance.hypervisor)
13798 remote_info.Raise("Error checking node %s" % instance.primary_node)
13799 if not remote_info.payload: # not running already
13800 raise errors.OpPrereqError("Instance %s is not running" %
13801 instance.name, errors.ECODE_STATE)
13803 current_memory = remote_info.payload["memory"]
13804 if (not self.op.force and
13805 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13806 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13807 raise errors.OpPrereqError("Instance %s must have memory between %d"
13808 " and %d MB of memory unless --force is"
13811 self.be_proposed[constants.BE_MINMEM],
13812 self.be_proposed[constants.BE_MAXMEM]),
13813 errors.ECODE_INVAL)
13815 delta = self.op.runtime_mem - current_memory
13817 _CheckNodeFreeMemory(self, instance.primary_node,
13818 "ballooning memory for instance %s" %
13819 instance.name, delta, instance.hypervisor)
13821 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13822 raise errors.OpPrereqError("Disk operations not supported for"
13823 " diskless instances", errors.ECODE_INVAL)
13825 def _PrepareNicCreate(_, params, private):
13826 self._PrepareNicModification(params, private, None, None,
13827 {}, cluster, pnode)
13828 return (None, None)
13830 def _PrepareNicMod(_, nic, params, private):
13831 self._PrepareNicModification(params, private, nic.ip, nic.network,
13832 nic.nicparams, cluster, pnode)
13835 def _PrepareNicRemove(_, params, __):
13837 net = params.network
13838 if net is not None and ip is not None:
13839 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13841 # Verify NIC changes (operating on copy)
13842 nics = instance.nics[:]
13843 ApplyContainerMods("NIC", nics, None, self.nicmod,
13844 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13845 if len(nics) > constants.MAX_NICS:
13846 raise errors.OpPrereqError("Instance has too many network interfaces"
13847 " (%d), cannot add more" % constants.MAX_NICS,
13848 errors.ECODE_STATE)
13850 # Verify disk changes (operating on a copy)
13851 disks = instance.disks[:]
13852 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13853 if len(disks) > constants.MAX_DISKS:
13854 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13855 " more" % constants.MAX_DISKS,
13856 errors.ECODE_STATE)
13857 disk_sizes = [disk.size for disk in instance.disks]
13858 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13859 self.diskmod if op == constants.DDM_ADD)
13860 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13861 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13863 if self.op.offline is not None and self.op.offline:
13864 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13865 msg="can't change to offline")
13867 # Pre-compute NIC changes (necessary to use result in hooks)
13868 self._nic_chgdesc = []
13870 # Operate on copies as this is still in prereq
13871 nics = [nic.Copy() for nic in instance.nics]
13872 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13873 self._CreateNewNic, self._ApplyNicMods, None)
13874 self._new_nics = nics
13875 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13877 self._new_nics = None
13878 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13880 if not self.op.ignore_ipolicy:
13881 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13884 # Fill ispec with backend parameters
13885 ispec[constants.ISPEC_SPINDLE_USE] = \
13886 self.be_new.get(constants.BE_SPINDLE_USE, None)
13887 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13890 # Copy ispec to verify parameters with min/max values separately
13891 ispec_max = ispec.copy()
13892 ispec_max[constants.ISPEC_MEM_SIZE] = \
13893 self.be_new.get(constants.BE_MAXMEM, None)
13894 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13895 ispec_min = ispec.copy()
13896 ispec_min[constants.ISPEC_MEM_SIZE] = \
13897 self.be_new.get(constants.BE_MINMEM, None)
13898 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13900 if (res_max or res_min):
13901 # FIXME: Improve error message by including information about whether
13902 # the upper or lower limit of the parameter fails the ipolicy.
13903 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13904 (group_info, group_info.name,
13905 utils.CommaJoin(set(res_max + res_min))))
13906 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13908 def _ConvertPlainToDrbd(self, feedback_fn):
13909 """Converts an instance from plain to drbd.
13912 feedback_fn("Converting template to drbd")
13913 instance = self.instance
13914 pnode = instance.primary_node
13915 snode = self.op.remote_node
13917 assert instance.disk_template == constants.DT_PLAIN
13919 # create a fake disk info for _GenerateDiskTemplate
13920 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13921 constants.IDISK_VG: d.logical_id[0]}
13922 for d in instance.disks]
13923 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13924 instance.name, pnode, [snode],
13925 disk_info, None, None, 0, feedback_fn,
13927 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13929 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13930 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13931 info = _GetInstanceInfoText(instance)
13932 feedback_fn("Creating additional volumes...")
13933 # first, create the missing data and meta devices
13934 for disk in anno_disks:
13935 # unfortunately this is... not too nice
13936 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13937 info, True, p_excl_stor)
13938 for child in disk.children:
13939 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13941 # at this stage, all new LVs have been created, we can rename the
13943 feedback_fn("Renaming original volumes...")
13944 rename_list = [(o, n.children[0].logical_id)
13945 for (o, n) in zip(instance.disks, new_disks)]
13946 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13947 result.Raise("Failed to rename original LVs")
13949 feedback_fn("Initializing DRBD devices...")
13950 # all child devices are in place, we can now create the DRBD devices
13951 for disk in anno_disks:
13952 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13953 f_create = node == pnode
13954 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13957 # at this point, the instance has been modified
13958 instance.disk_template = constants.DT_DRBD8
13959 instance.disks = new_disks
13960 self.cfg.Update(instance, feedback_fn)
13962 # Release node locks while waiting for sync
13963 _ReleaseLocks(self, locking.LEVEL_NODE)
13965 # disks are created, waiting for sync
13966 disk_abort = not _WaitForSync(self, instance,
13967 oneshot=not self.op.wait_for_sync)
13969 raise errors.OpExecError("There are some degraded disks for"
13970 " this instance, please cleanup manually")
13972 # Node resource locks will be released by caller
13974 def _ConvertDrbdToPlain(self, feedback_fn):
13975 """Converts an instance from drbd to plain.
13978 instance = self.instance
13980 assert len(instance.secondary_nodes) == 1
13981 assert instance.disk_template == constants.DT_DRBD8
13983 pnode = instance.primary_node
13984 snode = instance.secondary_nodes[0]
13985 feedback_fn("Converting template to plain")
13987 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13988 new_disks = [d.children[0] for d in instance.disks]
13990 # copy over size and mode
13991 for parent, child in zip(old_disks, new_disks):
13992 child.size = parent.size
13993 child.mode = parent.mode
13995 # this is a DRBD disk, return its port to the pool
13996 # NOTE: this must be done right before the call to cfg.Update!
13997 for disk in old_disks:
13998 tcp_port = disk.logical_id[2]
13999 self.cfg.AddTcpUdpPort(tcp_port)
14001 # update instance structure
14002 instance.disks = new_disks
14003 instance.disk_template = constants.DT_PLAIN
14004 self.cfg.Update(instance, feedback_fn)
14006 # Release locks in case removing disks takes a while
14007 _ReleaseLocks(self, locking.LEVEL_NODE)
14009 feedback_fn("Removing volumes on the secondary node...")
14010 for disk in old_disks:
14011 self.cfg.SetDiskID(disk, snode)
14012 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14014 self.LogWarning("Could not remove block device %s on node %s,"
14015 " continuing anyway: %s", disk.iv_name, snode, msg)
14017 feedback_fn("Removing unneeded volumes on the primary node...")
14018 for idx, disk in enumerate(old_disks):
14019 meta = disk.children[1]
14020 self.cfg.SetDiskID(meta, pnode)
14021 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14023 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14024 " continuing anyway: %s", idx, pnode, msg)
14026 def _CreateNewDisk(self, idx, params, _):
14027 """Creates a new disk.
14030 instance = self.instance
14033 if instance.disk_template in constants.DTS_FILEBASED:
14034 (file_driver, file_path) = instance.disks[0].logical_id
14035 file_path = os.path.dirname(file_path)
14037 file_driver = file_path = None
14040 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14041 instance.primary_node, instance.secondary_nodes,
14042 [params], file_path, file_driver, idx,
14043 self.Log, self.diskparams)[0]
14045 info = _GetInstanceInfoText(instance)
14047 logging.info("Creating volume %s for instance %s",
14048 disk.iv_name, instance.name)
14049 # Note: this needs to be kept in sync with _CreateDisks
14051 for node in instance.all_nodes:
14052 f_create = (node == instance.primary_node)
14054 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14055 except errors.OpExecError, err:
14056 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14057 disk.iv_name, disk, node, err)
14060 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14064 def _ModifyDisk(idx, disk, params, _):
14065 """Modifies a disk.
14068 disk.mode = params[constants.IDISK_MODE]
14071 ("disk.mode/%d" % idx, disk.mode),
14074 def _RemoveDisk(self, idx, root, _):
14078 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14079 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14080 self.cfg.SetDiskID(disk, node)
14081 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14083 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14084 " continuing anyway", idx, node, msg)
14086 # if this is a DRBD disk, return its port to the pool
14087 if root.dev_type in constants.LDS_DRBD:
14088 self.cfg.AddTcpUdpPort(root.logical_id[2])
14091 def _CreateNewNic(idx, params, private):
14092 """Creates data structure for a new network interface.
14095 mac = params[constants.INIC_MAC]
14096 ip = params.get(constants.INIC_IP, None)
14097 net = params.get(constants.INIC_NETWORK, None)
14098 #TODO: not private.filled?? can a nic have no nicparams??
14099 nicparams = private.filled
14101 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14103 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14104 (mac, ip, private.filled[constants.NIC_MODE],
14105 private.filled[constants.NIC_LINK],
14110 def _ApplyNicMods(idx, nic, params, private):
14111 """Modifies a network interface.
14116 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14118 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14119 setattr(nic, key, params[key])
14122 nic.nicparams = private.filled
14124 for (key, val) in nic.nicparams.items():
14125 changes.append(("nic.%s/%d" % (key, idx), val))
14129 def Exec(self, feedback_fn):
14130 """Modifies an instance.
14132 All parameters take effect only at the next restart of the instance.
14135 # Process here the warnings from CheckPrereq, as we don't have a
14136 # feedback_fn there.
14137 # TODO: Replace with self.LogWarning
14138 for warn in self.warn:
14139 feedback_fn("WARNING: %s" % warn)
14141 assert ((self.op.disk_template is None) ^
14142 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14143 "Not owning any node resource locks"
14146 instance = self.instance
14149 if self.op.runtime_mem:
14150 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14152 self.op.runtime_mem)
14153 rpcres.Raise("Cannot modify instance runtime memory")
14154 result.append(("runtime_memory", self.op.runtime_mem))
14156 # Apply disk changes
14157 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14158 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14159 _UpdateIvNames(0, instance.disks)
14161 if self.op.disk_template:
14163 check_nodes = set(instance.all_nodes)
14164 if self.op.remote_node:
14165 check_nodes.add(self.op.remote_node)
14166 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14167 owned = self.owned_locks(level)
14168 assert not (check_nodes - owned), \
14169 ("Not owning the correct locks, owning %r, expected at least %r" %
14170 (owned, check_nodes))
14172 r_shut = _ShutdownInstanceDisks(self, instance)
14174 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14175 " proceed with disk template conversion")
14176 mode = (instance.disk_template, self.op.disk_template)
14178 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14180 self.cfg.ReleaseDRBDMinors(instance.name)
14182 result.append(("disk_template", self.op.disk_template))
14184 assert instance.disk_template == self.op.disk_template, \
14185 ("Expected disk template '%s', found '%s'" %
14186 (self.op.disk_template, instance.disk_template))
14188 # Release node and resource locks if there are any (they might already have
14189 # been released during disk conversion)
14190 _ReleaseLocks(self, locking.LEVEL_NODE)
14191 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14193 # Apply NIC changes
14194 if self._new_nics is not None:
14195 instance.nics = self._new_nics
14196 result.extend(self._nic_chgdesc)
14199 if self.op.hvparams:
14200 instance.hvparams = self.hv_inst
14201 for key, val in self.op.hvparams.iteritems():
14202 result.append(("hv/%s" % key, val))
14205 if self.op.beparams:
14206 instance.beparams = self.be_inst
14207 for key, val in self.op.beparams.iteritems():
14208 result.append(("be/%s" % key, val))
14211 if self.op.os_name:
14212 instance.os = self.op.os_name
14215 if self.op.osparams:
14216 instance.osparams = self.os_inst
14217 for key, val in self.op.osparams.iteritems():
14218 result.append(("os/%s" % key, val))
14220 if self.op.offline is None:
14223 elif self.op.offline:
14224 # Mark instance as offline
14225 self.cfg.MarkInstanceOffline(instance.name)
14226 result.append(("admin_state", constants.ADMINST_OFFLINE))
14228 # Mark instance as online, but stopped
14229 self.cfg.MarkInstanceDown(instance.name)
14230 result.append(("admin_state", constants.ADMINST_DOWN))
14232 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14234 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14235 self.owned_locks(locking.LEVEL_NODE)), \
14236 "All node locks should have been released by now"
14240 _DISK_CONVERSIONS = {
14241 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14242 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14246 class LUInstanceChangeGroup(LogicalUnit):
14247 HPATH = "instance-change-group"
14248 HTYPE = constants.HTYPE_INSTANCE
14251 def ExpandNames(self):
14252 self.share_locks = _ShareAll()
14254 self.needed_locks = {
14255 locking.LEVEL_NODEGROUP: [],
14256 locking.LEVEL_NODE: [],
14257 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14260 self._ExpandAndLockInstance()
14262 if self.op.target_groups:
14263 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14264 self.op.target_groups)
14266 self.req_target_uuids = None
14268 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14270 def DeclareLocks(self, level):
14271 if level == locking.LEVEL_NODEGROUP:
14272 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14274 if self.req_target_uuids:
14275 lock_groups = set(self.req_target_uuids)
14277 # Lock all groups used by instance optimistically; this requires going
14278 # via the node before it's locked, requiring verification later on
14279 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14280 lock_groups.update(instance_groups)
14282 # No target groups, need to lock all of them
14283 lock_groups = locking.ALL_SET
14285 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14287 elif level == locking.LEVEL_NODE:
14288 if self.req_target_uuids:
14289 # Lock all nodes used by instances
14290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14291 self._LockInstancesNodes()
14293 # Lock all nodes in all potential target groups
14294 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14295 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14296 member_nodes = [node_name
14297 for group in lock_groups
14298 for node_name in self.cfg.GetNodeGroup(group).members]
14299 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14301 # Lock all nodes as all groups are potential targets
14302 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14304 def CheckPrereq(self):
14305 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14306 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14307 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14309 assert (self.req_target_uuids is None or
14310 owned_groups.issuperset(self.req_target_uuids))
14311 assert owned_instances == set([self.op.instance_name])
14313 # Get instance information
14314 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14316 # Check if node groups for locked instance are still correct
14317 assert owned_nodes.issuperset(self.instance.all_nodes), \
14318 ("Instance %s's nodes changed while we kept the lock" %
14319 self.op.instance_name)
14321 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14324 if self.req_target_uuids:
14325 # User requested specific target groups
14326 self.target_uuids = frozenset(self.req_target_uuids)
14328 # All groups except those used by the instance are potential targets
14329 self.target_uuids = owned_groups - inst_groups
14331 conflicting_groups = self.target_uuids & inst_groups
14332 if conflicting_groups:
14333 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14334 " used by the instance '%s'" %
14335 (utils.CommaJoin(conflicting_groups),
14336 self.op.instance_name),
14337 errors.ECODE_INVAL)
14339 if not self.target_uuids:
14340 raise errors.OpPrereqError("There are no possible target groups",
14341 errors.ECODE_INVAL)
14343 def BuildHooksEnv(self):
14344 """Build hooks env.
14347 assert self.target_uuids
14350 "TARGET_GROUPS": " ".join(self.target_uuids),
14353 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14357 def BuildHooksNodes(self):
14358 """Build hooks nodes.
14361 mn = self.cfg.GetMasterNode()
14362 return ([mn], [mn])
14364 def Exec(self, feedback_fn):
14365 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14367 assert instances == [self.op.instance_name], "Instance not locked"
14369 req = iallocator.IAReqGroupChange(instances=instances,
14370 target_groups=list(self.target_uuids))
14371 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14373 ial.Run(self.op.iallocator)
14375 if not ial.success:
14376 raise errors.OpPrereqError("Can't compute solution for changing group of"
14377 " instance '%s' using iallocator '%s': %s" %
14378 (self.op.instance_name, self.op.iallocator,
14379 ial.info), errors.ECODE_NORES)
14381 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14383 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14384 " instance '%s'", len(jobs), self.op.instance_name)
14386 return ResultWithJobs(jobs)
14389 class LUBackupQuery(NoHooksLU):
14390 """Query the exports list
14395 def CheckArguments(self):
14396 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14397 ["node", "export"], self.op.use_locking)
14399 def ExpandNames(self):
14400 self.expq.ExpandNames(self)
14402 def DeclareLocks(self, level):
14403 self.expq.DeclareLocks(self, level)
14405 def Exec(self, feedback_fn):
14408 for (node, expname) in self.expq.OldStyleQuery(self):
14409 if expname is None:
14410 result[node] = False
14412 result.setdefault(node, []).append(expname)
14417 class _ExportQuery(_QueryBase):
14418 FIELDS = query.EXPORT_FIELDS
14420 #: The node name is not a unique key for this query
14421 SORT_FIELD = "node"
14423 def ExpandNames(self, lu):
14424 lu.needed_locks = {}
14426 # The following variables interact with _QueryBase._GetNames
14428 self.wanted = _GetWantedNodes(lu, self.names)
14430 self.wanted = locking.ALL_SET
14432 self.do_locking = self.use_locking
14434 if self.do_locking:
14435 lu.share_locks = _ShareAll()
14436 lu.needed_locks = {
14437 locking.LEVEL_NODE: self.wanted,
14441 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14443 def DeclareLocks(self, lu, level):
14446 def _GetQueryData(self, lu):
14447 """Computes the list of nodes and their attributes.
14450 # Locking is not used
14452 assert not (compat.any(lu.glm.is_owned(level)
14453 for level in locking.LEVELS
14454 if level != locking.LEVEL_CLUSTER) or
14455 self.do_locking or self.use_locking)
14457 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14461 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14463 result.append((node, None))
14465 result.extend((node, expname) for expname in nres.payload)
14470 class LUBackupPrepare(NoHooksLU):
14471 """Prepares an instance for an export and returns useful information.
14476 def ExpandNames(self):
14477 self._ExpandAndLockInstance()
14479 def CheckPrereq(self):
14480 """Check prerequisites.
14483 instance_name = self.op.instance_name
14485 self.instance = self.cfg.GetInstanceInfo(instance_name)
14486 assert self.instance is not None, \
14487 "Cannot retrieve locked instance %s" % self.op.instance_name
14488 _CheckNodeOnline(self, self.instance.primary_node)
14490 self._cds = _GetClusterDomainSecret()
14492 def Exec(self, feedback_fn):
14493 """Prepares an instance for an export.
14496 instance = self.instance
14498 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14499 salt = utils.GenerateSecret(8)
14501 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14502 result = self.rpc.call_x509_cert_create(instance.primary_node,
14503 constants.RIE_CERT_VALIDITY)
14504 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14506 (name, cert_pem) = result.payload
14508 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14512 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14513 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14515 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14521 class LUBackupExport(LogicalUnit):
14522 """Export an instance to an image in the cluster.
14525 HPATH = "instance-export"
14526 HTYPE = constants.HTYPE_INSTANCE
14529 def CheckArguments(self):
14530 """Check the arguments.
14533 self.x509_key_name = self.op.x509_key_name
14534 self.dest_x509_ca_pem = self.op.destination_x509_ca
14536 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14537 if not self.x509_key_name:
14538 raise errors.OpPrereqError("Missing X509 key name for encryption",
14539 errors.ECODE_INVAL)
14541 if not self.dest_x509_ca_pem:
14542 raise errors.OpPrereqError("Missing destination X509 CA",
14543 errors.ECODE_INVAL)
14545 def ExpandNames(self):
14546 self._ExpandAndLockInstance()
14548 # Lock all nodes for local exports
14549 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14550 # FIXME: lock only instance primary and destination node
14552 # Sad but true, for now we have do lock all nodes, as we don't know where
14553 # the previous export might be, and in this LU we search for it and
14554 # remove it from its current node. In the future we could fix this by:
14555 # - making a tasklet to search (share-lock all), then create the
14556 # new one, then one to remove, after
14557 # - removing the removal operation altogether
14558 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14560 # Allocations should be stopped while this LU runs with node locks, but
14561 # it doesn't have to be exclusive
14562 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14563 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14565 def DeclareLocks(self, level):
14566 """Last minute lock declaration."""
14567 # All nodes are locked anyway, so nothing to do here.
14569 def BuildHooksEnv(self):
14570 """Build hooks env.
14572 This will run on the master, primary node and target node.
14576 "EXPORT_MODE": self.op.mode,
14577 "EXPORT_NODE": self.op.target_node,
14578 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14579 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14580 # TODO: Generic function for boolean env variables
14581 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14584 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14588 def BuildHooksNodes(self):
14589 """Build hooks nodes.
14592 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14594 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14595 nl.append(self.op.target_node)
14599 def CheckPrereq(self):
14600 """Check prerequisites.
14602 This checks that the instance and node names are valid.
14605 instance_name = self.op.instance_name
14607 self.instance = self.cfg.GetInstanceInfo(instance_name)
14608 assert self.instance is not None, \
14609 "Cannot retrieve locked instance %s" % self.op.instance_name
14610 _CheckNodeOnline(self, self.instance.primary_node)
14612 if (self.op.remove_instance and
14613 self.instance.admin_state == constants.ADMINST_UP and
14614 not self.op.shutdown):
14615 raise errors.OpPrereqError("Can not remove instance without shutting it"
14616 " down before", errors.ECODE_STATE)
14618 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14619 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14620 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14621 assert self.dst_node is not None
14623 _CheckNodeOnline(self, self.dst_node.name)
14624 _CheckNodeNotDrained(self, self.dst_node.name)
14627 self.dest_disk_info = None
14628 self.dest_x509_ca = None
14630 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14631 self.dst_node = None
14633 if len(self.op.target_node) != len(self.instance.disks):
14634 raise errors.OpPrereqError(("Received destination information for %s"
14635 " disks, but instance %s has %s disks") %
14636 (len(self.op.target_node), instance_name,
14637 len(self.instance.disks)),
14638 errors.ECODE_INVAL)
14640 cds = _GetClusterDomainSecret()
14642 # Check X509 key name
14644 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14645 except (TypeError, ValueError), err:
14646 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14647 errors.ECODE_INVAL)
14649 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14650 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14651 errors.ECODE_INVAL)
14653 # Load and verify CA
14655 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14656 except OpenSSL.crypto.Error, err:
14657 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14658 (err, ), errors.ECODE_INVAL)
14660 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14661 if errcode is not None:
14662 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14663 (msg, ), errors.ECODE_INVAL)
14665 self.dest_x509_ca = cert
14667 # Verify target information
14669 for idx, disk_data in enumerate(self.op.target_node):
14671 (host, port, magic) = \
14672 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14673 except errors.GenericError, err:
14674 raise errors.OpPrereqError("Target info for disk %s: %s" %
14675 (idx, err), errors.ECODE_INVAL)
14677 disk_info.append((host, port, magic))
14679 assert len(disk_info) == len(self.op.target_node)
14680 self.dest_disk_info = disk_info
14683 raise errors.ProgrammerError("Unhandled export mode %r" %
14686 # instance disk type verification
14687 # TODO: Implement export support for file-based disks
14688 for disk in self.instance.disks:
14689 if disk.dev_type == constants.LD_FILE:
14690 raise errors.OpPrereqError("Export not supported for instances with"
14691 " file-based disks", errors.ECODE_INVAL)
14693 def _CleanupExports(self, feedback_fn):
14694 """Removes exports of current instance from all other nodes.
14696 If an instance in a cluster with nodes A..D was exported to node C, its
14697 exports will be removed from the nodes A, B and D.
14700 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14702 nodelist = self.cfg.GetNodeList()
14703 nodelist.remove(self.dst_node.name)
14705 # on one-node clusters nodelist will be empty after the removal
14706 # if we proceed the backup would be removed because OpBackupQuery
14707 # substitutes an empty list with the full cluster node list.
14708 iname = self.instance.name
14710 feedback_fn("Removing old exports for instance %s" % iname)
14711 exportlist = self.rpc.call_export_list(nodelist)
14712 for node in exportlist:
14713 if exportlist[node].fail_msg:
14715 if iname in exportlist[node].payload:
14716 msg = self.rpc.call_export_remove(node, iname).fail_msg
14718 self.LogWarning("Could not remove older export for instance %s"
14719 " on node %s: %s", iname, node, msg)
14721 def Exec(self, feedback_fn):
14722 """Export an instance to an image in the cluster.
14725 assert self.op.mode in constants.EXPORT_MODES
14727 instance = self.instance
14728 src_node = instance.primary_node
14730 if self.op.shutdown:
14731 # shutdown the instance, but not the disks
14732 feedback_fn("Shutting down instance %s" % instance.name)
14733 result = self.rpc.call_instance_shutdown(src_node, instance,
14734 self.op.shutdown_timeout)
14735 # TODO: Maybe ignore failures if ignore_remove_failures is set
14736 result.Raise("Could not shutdown instance %s on"
14737 " node %s" % (instance.name, src_node))
14739 # set the disks ID correctly since call_instance_start needs the
14740 # correct drbd minor to create the symlinks
14741 for disk in instance.disks:
14742 self.cfg.SetDiskID(disk, src_node)
14744 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14747 # Activate the instance disks if we'exporting a stopped instance
14748 feedback_fn("Activating disks for %s" % instance.name)
14749 _StartInstanceDisks(self, instance, None)
14752 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14755 helper.CreateSnapshots()
14757 if (self.op.shutdown and
14758 instance.admin_state == constants.ADMINST_UP and
14759 not self.op.remove_instance):
14760 assert not activate_disks
14761 feedback_fn("Starting instance %s" % instance.name)
14762 result = self.rpc.call_instance_start(src_node,
14763 (instance, None, None), False)
14764 msg = result.fail_msg
14766 feedback_fn("Failed to start instance: %s" % msg)
14767 _ShutdownInstanceDisks(self, instance)
14768 raise errors.OpExecError("Could not start instance: %s" % msg)
14770 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14771 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14772 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14773 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14774 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14776 (key_name, _, _) = self.x509_key_name
14779 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14782 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14783 key_name, dest_ca_pem,
14788 # Check for backwards compatibility
14789 assert len(dresults) == len(instance.disks)
14790 assert compat.all(isinstance(i, bool) for i in dresults), \
14791 "Not all results are boolean: %r" % dresults
14795 feedback_fn("Deactivating disks for %s" % instance.name)
14796 _ShutdownInstanceDisks(self, instance)
14798 if not (compat.all(dresults) and fin_resu):
14801 failures.append("export finalization")
14802 if not compat.all(dresults):
14803 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14805 failures.append("disk export: disk(s) %s" % fdsk)
14807 raise errors.OpExecError("Export failed, errors in %s" %
14808 utils.CommaJoin(failures))
14810 # At this point, the export was successful, we can cleanup/finish
14812 # Remove instance if requested
14813 if self.op.remove_instance:
14814 feedback_fn("Removing instance %s" % instance.name)
14815 _RemoveInstance(self, feedback_fn, instance,
14816 self.op.ignore_remove_failures)
14818 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14819 self._CleanupExports(feedback_fn)
14821 return fin_resu, dresults
14824 class LUBackupRemove(NoHooksLU):
14825 """Remove exports related to the named instance.
14830 def ExpandNames(self):
14831 self.needed_locks = {
14832 # We need all nodes to be locked in order for RemoveExport to work, but
14833 # we don't need to lock the instance itself, as nothing will happen to it
14834 # (and we can remove exports also for a removed instance)
14835 locking.LEVEL_NODE: locking.ALL_SET,
14837 # Removing backups is quick, so blocking allocations is justified
14838 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14841 # Allocations should be stopped while this LU runs with node locks, but it
14842 # doesn't have to be exclusive
14843 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14845 def Exec(self, feedback_fn):
14846 """Remove any export.
14849 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14850 # If the instance was not found we'll try with the name that was passed in.
14851 # This will only work if it was an FQDN, though.
14853 if not instance_name:
14855 instance_name = self.op.instance_name
14857 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14858 exportlist = self.rpc.call_export_list(locked_nodes)
14860 for node in exportlist:
14861 msg = exportlist[node].fail_msg
14863 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14865 if instance_name in exportlist[node].payload:
14867 result = self.rpc.call_export_remove(node, instance_name)
14868 msg = result.fail_msg
14870 logging.error("Could not remove export for instance %s"
14871 " on node %s: %s", instance_name, node, msg)
14873 if fqdn_warn and not found:
14874 feedback_fn("Export not found. If trying to remove an export belonging"
14875 " to a deleted instance please use its Fully Qualified"
14879 class LUGroupAdd(LogicalUnit):
14880 """Logical unit for creating node groups.
14883 HPATH = "group-add"
14884 HTYPE = constants.HTYPE_GROUP
14887 def ExpandNames(self):
14888 # We need the new group's UUID here so that we can create and acquire the
14889 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14890 # that it should not check whether the UUID exists in the configuration.
14891 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14892 self.needed_locks = {}
14893 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14895 def CheckPrereq(self):
14896 """Check prerequisites.
14898 This checks that the given group name is not an existing node group
14903 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14904 except errors.OpPrereqError:
14907 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14908 " node group (UUID: %s)" %
14909 (self.op.group_name, existing_uuid),
14910 errors.ECODE_EXISTS)
14912 if self.op.ndparams:
14913 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14915 if self.op.hv_state:
14916 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14918 self.new_hv_state = None
14920 if self.op.disk_state:
14921 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14923 self.new_disk_state = None
14925 if self.op.diskparams:
14926 for templ in constants.DISK_TEMPLATES:
14927 if templ in self.op.diskparams:
14928 utils.ForceDictType(self.op.diskparams[templ],
14929 constants.DISK_DT_TYPES)
14930 self.new_diskparams = self.op.diskparams
14932 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14933 except errors.OpPrereqError, err:
14934 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14935 errors.ECODE_INVAL)
14937 self.new_diskparams = {}
14939 if self.op.ipolicy:
14940 cluster = self.cfg.GetClusterInfo()
14941 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14943 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14944 except errors.ConfigurationError, err:
14945 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14946 errors.ECODE_INVAL)
14948 def BuildHooksEnv(self):
14949 """Build hooks env.
14953 "GROUP_NAME": self.op.group_name,
14956 def BuildHooksNodes(self):
14957 """Build hooks nodes.
14960 mn = self.cfg.GetMasterNode()
14961 return ([mn], [mn])
14963 def Exec(self, feedback_fn):
14964 """Add the node group to the cluster.
14967 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14968 uuid=self.group_uuid,
14969 alloc_policy=self.op.alloc_policy,
14970 ndparams=self.op.ndparams,
14971 diskparams=self.new_diskparams,
14972 ipolicy=self.op.ipolicy,
14973 hv_state_static=self.new_hv_state,
14974 disk_state_static=self.new_disk_state)
14976 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14977 del self.remove_locks[locking.LEVEL_NODEGROUP]
14980 class LUGroupAssignNodes(NoHooksLU):
14981 """Logical unit for assigning nodes to groups.
14986 def ExpandNames(self):
14987 # These raise errors.OpPrereqError on their own:
14988 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14989 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14991 # We want to lock all the affected nodes and groups. We have readily
14992 # available the list of nodes, and the *destination* group. To gather the
14993 # list of "source" groups, we need to fetch node information later on.
14994 self.needed_locks = {
14995 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14996 locking.LEVEL_NODE: self.op.nodes,
14999 def DeclareLocks(self, level):
15000 if level == locking.LEVEL_NODEGROUP:
15001 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15003 # Try to get all affected nodes' groups without having the group or node
15004 # lock yet. Needs verification later in the code flow.
15005 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15007 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15009 def CheckPrereq(self):
15010 """Check prerequisites.
15013 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15014 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15015 frozenset(self.op.nodes))
15017 expected_locks = (set([self.group_uuid]) |
15018 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15019 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15020 if actual_locks != expected_locks:
15021 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15022 " current groups are '%s', used to be '%s'" %
15023 (utils.CommaJoin(expected_locks),
15024 utils.CommaJoin(actual_locks)))
15026 self.node_data = self.cfg.GetAllNodesInfo()
15027 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15028 instance_data = self.cfg.GetAllInstancesInfo()
15030 if self.group is None:
15031 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15032 (self.op.group_name, self.group_uuid))
15034 (new_splits, previous_splits) = \
15035 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15036 for node in self.op.nodes],
15037 self.node_data, instance_data)
15040 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15042 if not self.op.force:
15043 raise errors.OpExecError("The following instances get split by this"
15044 " change and --force was not given: %s" %
15047 self.LogWarning("This operation will split the following instances: %s",
15050 if previous_splits:
15051 self.LogWarning("In addition, these already-split instances continue"
15052 " to be split across groups: %s",
15053 utils.CommaJoin(utils.NiceSort(previous_splits)))
15055 def Exec(self, feedback_fn):
15056 """Assign nodes to a new group.
15059 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15061 self.cfg.AssignGroupNodes(mods)
15064 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15065 """Check for split instances after a node assignment.
15067 This method considers a series of node assignments as an atomic operation,
15068 and returns information about split instances after applying the set of
15071 In particular, it returns information about newly split instances, and
15072 instances that were already split, and remain so after the change.
15074 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15077 @type changes: list of (node_name, new_group_uuid) pairs.
15078 @param changes: list of node assignments to consider.
15079 @param node_data: a dict with data for all nodes
15080 @param instance_data: a dict with all instances to consider
15081 @rtype: a two-tuple
15082 @return: a list of instances that were previously okay and result split as a
15083 consequence of this change, and a list of instances that were previously
15084 split and this change does not fix.
15087 changed_nodes = dict((node, group) for node, group in changes
15088 if node_data[node].group != group)
15090 all_split_instances = set()
15091 previously_split_instances = set()
15093 def InstanceNodes(instance):
15094 return [instance.primary_node] + list(instance.secondary_nodes)
15096 for inst in instance_data.values():
15097 if inst.disk_template not in constants.DTS_INT_MIRROR:
15100 instance_nodes = InstanceNodes(inst)
15102 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15103 previously_split_instances.add(inst.name)
15105 if len(set(changed_nodes.get(node, node_data[node].group)
15106 for node in instance_nodes)) > 1:
15107 all_split_instances.add(inst.name)
15109 return (list(all_split_instances - previously_split_instances),
15110 list(previously_split_instances & all_split_instances))
15113 class _GroupQuery(_QueryBase):
15114 FIELDS = query.GROUP_FIELDS
15116 def ExpandNames(self, lu):
15117 lu.needed_locks = {}
15119 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15120 self._cluster = lu.cfg.GetClusterInfo()
15121 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15124 self.wanted = [name_to_uuid[name]
15125 for name in utils.NiceSort(name_to_uuid.keys())]
15127 # Accept names to be either names or UUIDs.
15130 all_uuid = frozenset(self._all_groups.keys())
15132 for name in self.names:
15133 if name in all_uuid:
15134 self.wanted.append(name)
15135 elif name in name_to_uuid:
15136 self.wanted.append(name_to_uuid[name])
15138 missing.append(name)
15141 raise errors.OpPrereqError("Some groups do not exist: %s" %
15142 utils.CommaJoin(missing),
15143 errors.ECODE_NOENT)
15145 def DeclareLocks(self, lu, level):
15148 def _GetQueryData(self, lu):
15149 """Computes the list of node groups and their attributes.
15152 do_nodes = query.GQ_NODE in self.requested_data
15153 do_instances = query.GQ_INST in self.requested_data
15155 group_to_nodes = None
15156 group_to_instances = None
15158 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15159 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15160 # latter GetAllInstancesInfo() is not enough, for we have to go through
15161 # instance->node. Hence, we will need to process nodes even if we only need
15162 # instance information.
15163 if do_nodes or do_instances:
15164 all_nodes = lu.cfg.GetAllNodesInfo()
15165 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15168 for node in all_nodes.values():
15169 if node.group in group_to_nodes:
15170 group_to_nodes[node.group].append(node.name)
15171 node_to_group[node.name] = node.group
15174 all_instances = lu.cfg.GetAllInstancesInfo()
15175 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15177 for instance in all_instances.values():
15178 node = instance.primary_node
15179 if node in node_to_group:
15180 group_to_instances[node_to_group[node]].append(instance.name)
15183 # Do not pass on node information if it was not requested.
15184 group_to_nodes = None
15186 return query.GroupQueryData(self._cluster,
15187 [self._all_groups[uuid]
15188 for uuid in self.wanted],
15189 group_to_nodes, group_to_instances,
15190 query.GQ_DISKPARAMS in self.requested_data)
15193 class LUGroupQuery(NoHooksLU):
15194 """Logical unit for querying node groups.
15199 def CheckArguments(self):
15200 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15201 self.op.output_fields, False)
15203 def ExpandNames(self):
15204 self.gq.ExpandNames(self)
15206 def DeclareLocks(self, level):
15207 self.gq.DeclareLocks(self, level)
15209 def Exec(self, feedback_fn):
15210 return self.gq.OldStyleQuery(self)
15213 class LUGroupSetParams(LogicalUnit):
15214 """Modifies the parameters of a node group.
15217 HPATH = "group-modify"
15218 HTYPE = constants.HTYPE_GROUP
15221 def CheckArguments(self):
15224 self.op.diskparams,
15225 self.op.alloc_policy,
15227 self.op.disk_state,
15231 if all_changes.count(None) == len(all_changes):
15232 raise errors.OpPrereqError("Please pass at least one modification",
15233 errors.ECODE_INVAL)
15235 def ExpandNames(self):
15236 # This raises errors.OpPrereqError on its own:
15237 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15239 self.needed_locks = {
15240 locking.LEVEL_INSTANCE: [],
15241 locking.LEVEL_NODEGROUP: [self.group_uuid],
15244 self.share_locks[locking.LEVEL_INSTANCE] = 1
15246 def DeclareLocks(self, level):
15247 if level == locking.LEVEL_INSTANCE:
15248 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15250 # Lock instances optimistically, needs verification once group lock has
15252 self.needed_locks[locking.LEVEL_INSTANCE] = \
15253 self.cfg.GetNodeGroupInstances(self.group_uuid)
15256 def _UpdateAndVerifyDiskParams(old, new):
15257 """Updates and verifies disk parameters.
15260 new_params = _GetUpdatedParams(old, new)
15261 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15264 def CheckPrereq(self):
15265 """Check prerequisites.
15268 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15270 # Check if locked instances are still correct
15271 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15273 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15274 cluster = self.cfg.GetClusterInfo()
15276 if self.group is None:
15277 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15278 (self.op.group_name, self.group_uuid))
15280 if self.op.ndparams:
15281 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15282 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15283 self.new_ndparams = new_ndparams
15285 if self.op.diskparams:
15286 diskparams = self.group.diskparams
15287 uavdp = self._UpdateAndVerifyDiskParams
15288 # For each disktemplate subdict update and verify the values
15289 new_diskparams = dict((dt,
15290 uavdp(diskparams.get(dt, {}),
15291 self.op.diskparams[dt]))
15292 for dt in constants.DISK_TEMPLATES
15293 if dt in self.op.diskparams)
15294 # As we've all subdicts of diskparams ready, lets merge the actual
15295 # dict with all updated subdicts
15296 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15298 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15299 except errors.OpPrereqError, err:
15300 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15301 errors.ECODE_INVAL)
15303 if self.op.hv_state:
15304 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15305 self.group.hv_state_static)
15307 if self.op.disk_state:
15308 self.new_disk_state = \
15309 _MergeAndVerifyDiskState(self.op.disk_state,
15310 self.group.disk_state_static)
15312 if self.op.ipolicy:
15313 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15317 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15318 inst_filter = lambda inst: inst.name in owned_instances
15319 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15320 gmi = ganeti.masterd.instance
15322 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15324 new_ipolicy, instances)
15327 self.LogWarning("After the ipolicy change the following instances"
15328 " violate them: %s",
15329 utils.CommaJoin(violations))
15331 def BuildHooksEnv(self):
15332 """Build hooks env.
15336 "GROUP_NAME": self.op.group_name,
15337 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15340 def BuildHooksNodes(self):
15341 """Build hooks nodes.
15344 mn = self.cfg.GetMasterNode()
15345 return ([mn], [mn])
15347 def Exec(self, feedback_fn):
15348 """Modifies the node group.
15353 if self.op.ndparams:
15354 self.group.ndparams = self.new_ndparams
15355 result.append(("ndparams", str(self.group.ndparams)))
15357 if self.op.diskparams:
15358 self.group.diskparams = self.new_diskparams
15359 result.append(("diskparams", str(self.group.diskparams)))
15361 if self.op.alloc_policy:
15362 self.group.alloc_policy = self.op.alloc_policy
15364 if self.op.hv_state:
15365 self.group.hv_state_static = self.new_hv_state
15367 if self.op.disk_state:
15368 self.group.disk_state_static = self.new_disk_state
15370 if self.op.ipolicy:
15371 self.group.ipolicy = self.new_ipolicy
15373 self.cfg.Update(self.group, feedback_fn)
15377 class LUGroupRemove(LogicalUnit):
15378 HPATH = "group-remove"
15379 HTYPE = constants.HTYPE_GROUP
15382 def ExpandNames(self):
15383 # This will raises errors.OpPrereqError on its own:
15384 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15385 self.needed_locks = {
15386 locking.LEVEL_NODEGROUP: [self.group_uuid],
15389 def CheckPrereq(self):
15390 """Check prerequisites.
15392 This checks that the given group name exists as a node group, that is
15393 empty (i.e., contains no nodes), and that is not the last group of the
15397 # Verify that the group is empty.
15398 group_nodes = [node.name
15399 for node in self.cfg.GetAllNodesInfo().values()
15400 if node.group == self.group_uuid]
15403 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15405 (self.op.group_name,
15406 utils.CommaJoin(utils.NiceSort(group_nodes))),
15407 errors.ECODE_STATE)
15409 # Verify the cluster would not be left group-less.
15410 if len(self.cfg.GetNodeGroupList()) == 1:
15411 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15412 " removed" % self.op.group_name,
15413 errors.ECODE_STATE)
15415 def BuildHooksEnv(self):
15416 """Build hooks env.
15420 "GROUP_NAME": self.op.group_name,
15423 def BuildHooksNodes(self):
15424 """Build hooks nodes.
15427 mn = self.cfg.GetMasterNode()
15428 return ([mn], [mn])
15430 def Exec(self, feedback_fn):
15431 """Remove the node group.
15435 self.cfg.RemoveNodeGroup(self.group_uuid)
15436 except errors.ConfigurationError:
15437 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15438 (self.op.group_name, self.group_uuid))
15440 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15443 class LUGroupRename(LogicalUnit):
15444 HPATH = "group-rename"
15445 HTYPE = constants.HTYPE_GROUP
15448 def ExpandNames(self):
15449 # This raises errors.OpPrereqError on its own:
15450 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15452 self.needed_locks = {
15453 locking.LEVEL_NODEGROUP: [self.group_uuid],
15456 def CheckPrereq(self):
15457 """Check prerequisites.
15459 Ensures requested new name is not yet used.
15463 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15464 except errors.OpPrereqError:
15467 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15468 " node group (UUID: %s)" %
15469 (self.op.new_name, new_name_uuid),
15470 errors.ECODE_EXISTS)
15472 def BuildHooksEnv(self):
15473 """Build hooks env.
15477 "OLD_NAME": self.op.group_name,
15478 "NEW_NAME": self.op.new_name,
15481 def BuildHooksNodes(self):
15482 """Build hooks nodes.
15485 mn = self.cfg.GetMasterNode()
15487 all_nodes = self.cfg.GetAllNodesInfo()
15488 all_nodes.pop(mn, None)
15491 run_nodes.extend(node.name for node in all_nodes.values()
15492 if node.group == self.group_uuid)
15494 return (run_nodes, run_nodes)
15496 def Exec(self, feedback_fn):
15497 """Rename the node group.
15500 group = self.cfg.GetNodeGroup(self.group_uuid)
15503 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15504 (self.op.group_name, self.group_uuid))
15506 group.name = self.op.new_name
15507 self.cfg.Update(group, feedback_fn)
15509 return self.op.new_name
15512 class LUGroupEvacuate(LogicalUnit):
15513 HPATH = "group-evacuate"
15514 HTYPE = constants.HTYPE_GROUP
15517 def ExpandNames(self):
15518 # This raises errors.OpPrereqError on its own:
15519 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15521 if self.op.target_groups:
15522 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15523 self.op.target_groups)
15525 self.req_target_uuids = []
15527 if self.group_uuid in self.req_target_uuids:
15528 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15529 " as a target group (targets are %s)" %
15531 utils.CommaJoin(self.req_target_uuids)),
15532 errors.ECODE_INVAL)
15534 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15536 self.share_locks = _ShareAll()
15537 self.needed_locks = {
15538 locking.LEVEL_INSTANCE: [],
15539 locking.LEVEL_NODEGROUP: [],
15540 locking.LEVEL_NODE: [],
15543 def DeclareLocks(self, level):
15544 if level == locking.LEVEL_INSTANCE:
15545 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15547 # Lock instances optimistically, needs verification once node and group
15548 # locks have been acquired
15549 self.needed_locks[locking.LEVEL_INSTANCE] = \
15550 self.cfg.GetNodeGroupInstances(self.group_uuid)
15552 elif level == locking.LEVEL_NODEGROUP:
15553 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15555 if self.req_target_uuids:
15556 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15558 # Lock all groups used by instances optimistically; this requires going
15559 # via the node before it's locked, requiring verification later on
15560 lock_groups.update(group_uuid
15561 for instance_name in
15562 self.owned_locks(locking.LEVEL_INSTANCE)
15564 self.cfg.GetInstanceNodeGroups(instance_name))
15566 # No target groups, need to lock all of them
15567 lock_groups = locking.ALL_SET
15569 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15571 elif level == locking.LEVEL_NODE:
15572 # This will only lock the nodes in the group to be evacuated which
15573 # contain actual instances
15574 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15575 self._LockInstancesNodes()
15577 # Lock all nodes in group to be evacuated and target groups
15578 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15579 assert self.group_uuid in owned_groups
15580 member_nodes = [node_name
15581 for group in owned_groups
15582 for node_name in self.cfg.GetNodeGroup(group).members]
15583 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15585 def CheckPrereq(self):
15586 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15587 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15588 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15590 assert owned_groups.issuperset(self.req_target_uuids)
15591 assert self.group_uuid in owned_groups
15593 # Check if locked instances are still correct
15594 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15596 # Get instance information
15597 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15599 # Check if node groups for locked instances are still correct
15600 _CheckInstancesNodeGroups(self.cfg, self.instances,
15601 owned_groups, owned_nodes, self.group_uuid)
15603 if self.req_target_uuids:
15604 # User requested specific target groups
15605 self.target_uuids = self.req_target_uuids
15607 # All groups except the one to be evacuated are potential targets
15608 self.target_uuids = [group_uuid for group_uuid in owned_groups
15609 if group_uuid != self.group_uuid]
15611 if not self.target_uuids:
15612 raise errors.OpPrereqError("There are no possible target groups",
15613 errors.ECODE_INVAL)
15615 def BuildHooksEnv(self):
15616 """Build hooks env.
15620 "GROUP_NAME": self.op.group_name,
15621 "TARGET_GROUPS": " ".join(self.target_uuids),
15624 def BuildHooksNodes(self):
15625 """Build hooks nodes.
15628 mn = self.cfg.GetMasterNode()
15630 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15632 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15634 return (run_nodes, run_nodes)
15636 def Exec(self, feedback_fn):
15637 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15639 assert self.group_uuid not in self.target_uuids
15641 req = iallocator.IAReqGroupChange(instances=instances,
15642 target_groups=self.target_uuids)
15643 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15645 ial.Run(self.op.iallocator)
15647 if not ial.success:
15648 raise errors.OpPrereqError("Can't compute group evacuation using"
15649 " iallocator '%s': %s" %
15650 (self.op.iallocator, ial.info),
15651 errors.ECODE_NORES)
15653 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15655 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15656 len(jobs), self.op.group_name)
15658 return ResultWithJobs(jobs)
15661 class TagsLU(NoHooksLU): # pylint: disable=W0223
15662 """Generic tags LU.
15664 This is an abstract class which is the parent of all the other tags LUs.
15667 def ExpandNames(self):
15668 self.group_uuid = None
15669 self.needed_locks = {}
15671 if self.op.kind == constants.TAG_NODE:
15672 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15673 lock_level = locking.LEVEL_NODE
15674 lock_name = self.op.name
15675 elif self.op.kind == constants.TAG_INSTANCE:
15676 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15677 lock_level = locking.LEVEL_INSTANCE
15678 lock_name = self.op.name
15679 elif self.op.kind == constants.TAG_NODEGROUP:
15680 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15681 lock_level = locking.LEVEL_NODEGROUP
15682 lock_name = self.group_uuid
15683 elif self.op.kind == constants.TAG_NETWORK:
15684 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15685 lock_level = locking.LEVEL_NETWORK
15686 lock_name = self.network_uuid
15691 if lock_level and getattr(self.op, "use_locking", True):
15692 self.needed_locks[lock_level] = lock_name
15694 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15695 # not possible to acquire the BGL based on opcode parameters)
15697 def CheckPrereq(self):
15698 """Check prerequisites.
15701 if self.op.kind == constants.TAG_CLUSTER:
15702 self.target = self.cfg.GetClusterInfo()
15703 elif self.op.kind == constants.TAG_NODE:
15704 self.target = self.cfg.GetNodeInfo(self.op.name)
15705 elif self.op.kind == constants.TAG_INSTANCE:
15706 self.target = self.cfg.GetInstanceInfo(self.op.name)
15707 elif self.op.kind == constants.TAG_NODEGROUP:
15708 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15709 elif self.op.kind == constants.TAG_NETWORK:
15710 self.target = self.cfg.GetNetwork(self.network_uuid)
15712 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15713 str(self.op.kind), errors.ECODE_INVAL)
15716 class LUTagsGet(TagsLU):
15717 """Returns the tags of a given object.
15722 def ExpandNames(self):
15723 TagsLU.ExpandNames(self)
15725 # Share locks as this is only a read operation
15726 self.share_locks = _ShareAll()
15728 def Exec(self, feedback_fn):
15729 """Returns the tag list.
15732 return list(self.target.GetTags())
15735 class LUTagsSearch(NoHooksLU):
15736 """Searches the tags for a given pattern.
15741 def ExpandNames(self):
15742 self.needed_locks = {}
15744 def CheckPrereq(self):
15745 """Check prerequisites.
15747 This checks the pattern passed for validity by compiling it.
15751 self.re = re.compile(self.op.pattern)
15752 except re.error, err:
15753 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15754 (self.op.pattern, err), errors.ECODE_INVAL)
15756 def Exec(self, feedback_fn):
15757 """Returns the tag list.
15761 tgts = [("/cluster", cfg.GetClusterInfo())]
15762 ilist = cfg.GetAllInstancesInfo().values()
15763 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15764 nlist = cfg.GetAllNodesInfo().values()
15765 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15766 tgts.extend(("/nodegroup/%s" % n.name, n)
15767 for n in cfg.GetAllNodeGroupsInfo().values())
15769 for path, target in tgts:
15770 for tag in target.GetTags():
15771 if self.re.search(tag):
15772 results.append((path, tag))
15776 class LUTagsSet(TagsLU):
15777 """Sets a tag on a given object.
15782 def CheckPrereq(self):
15783 """Check prerequisites.
15785 This checks the type and length of the tag name and value.
15788 TagsLU.CheckPrereq(self)
15789 for tag in self.op.tags:
15790 objects.TaggableObject.ValidateTag(tag)
15792 def Exec(self, feedback_fn):
15797 for tag in self.op.tags:
15798 self.target.AddTag(tag)
15799 except errors.TagError, err:
15800 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15801 self.cfg.Update(self.target, feedback_fn)
15804 class LUTagsDel(TagsLU):
15805 """Delete a list of tags from a given object.
15810 def CheckPrereq(self):
15811 """Check prerequisites.
15813 This checks that we have the given tag.
15816 TagsLU.CheckPrereq(self)
15817 for tag in self.op.tags:
15818 objects.TaggableObject.ValidateTag(tag)
15819 del_tags = frozenset(self.op.tags)
15820 cur_tags = self.target.GetTags()
15822 diff_tags = del_tags - cur_tags
15824 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15825 raise errors.OpPrereqError("Tag(s) %s not found" %
15826 (utils.CommaJoin(diff_names), ),
15827 errors.ECODE_NOENT)
15829 def Exec(self, feedback_fn):
15830 """Remove the tag from the object.
15833 for tag in self.op.tags:
15834 self.target.RemoveTag(tag)
15835 self.cfg.Update(self.target, feedback_fn)
15838 class LUTestDelay(NoHooksLU):
15839 """Sleep for a specified amount of time.
15841 This LU sleeps on the master and/or nodes for a specified amount of
15847 def ExpandNames(self):
15848 """Expand names and set required locks.
15850 This expands the node list, if any.
15853 self.needed_locks = {}
15854 if self.op.on_nodes:
15855 # _GetWantedNodes can be used here, but is not always appropriate to use
15856 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15857 # more information.
15858 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15859 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15861 def _TestDelay(self):
15862 """Do the actual sleep.
15865 if self.op.on_master:
15866 if not utils.TestDelay(self.op.duration):
15867 raise errors.OpExecError("Error during master delay test")
15868 if self.op.on_nodes:
15869 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15870 for node, node_result in result.items():
15871 node_result.Raise("Failure during rpc call to node %s" % node)
15873 def Exec(self, feedback_fn):
15874 """Execute the test delay opcode, with the wanted repetitions.
15877 if self.op.repeat == 0:
15880 top_value = self.op.repeat - 1
15881 for i in range(self.op.repeat):
15882 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15886 class LURestrictedCommand(NoHooksLU):
15887 """Logical unit for executing restricted commands.
15892 def ExpandNames(self):
15894 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15896 self.needed_locks = {
15897 locking.LEVEL_NODE: self.op.nodes,
15899 self.share_locks = {
15900 locking.LEVEL_NODE: not self.op.use_locking,
15903 def CheckPrereq(self):
15904 """Check prerequisites.
15908 def Exec(self, feedback_fn):
15909 """Execute restricted command and return output.
15912 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15914 # Check if correct locks are held
15915 assert set(self.op.nodes).issubset(owned_nodes)
15917 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15921 for node_name in self.op.nodes:
15922 nres = rpcres[node_name]
15924 msg = ("Command '%s' on node '%s' failed: %s" %
15925 (self.op.command, node_name, nres.fail_msg))
15926 result.append((False, msg))
15928 result.append((True, nres.payload))
15933 class LUTestJqueue(NoHooksLU):
15934 """Utility LU to test some aspects of the job queue.
15939 # Must be lower than default timeout for WaitForJobChange to see whether it
15940 # notices changed jobs
15941 _CLIENT_CONNECT_TIMEOUT = 20.0
15942 _CLIENT_CONFIRM_TIMEOUT = 60.0
15945 def _NotifyUsingSocket(cls, cb, errcls):
15946 """Opens a Unix socket and waits for another program to connect.
15949 @param cb: Callback to send socket name to client
15950 @type errcls: class
15951 @param errcls: Exception class to use for errors
15954 # Using a temporary directory as there's no easy way to create temporary
15955 # sockets without writing a custom loop around tempfile.mktemp and
15957 tmpdir = tempfile.mkdtemp()
15959 tmpsock = utils.PathJoin(tmpdir, "sock")
15961 logging.debug("Creating temporary socket at %s", tmpsock)
15962 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15967 # Send details to client
15970 # Wait for client to connect before continuing
15971 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15973 (conn, _) = sock.accept()
15974 except socket.error, err:
15975 raise errcls("Client didn't connect in time (%s)" % err)
15979 # Remove as soon as client is connected
15980 shutil.rmtree(tmpdir)
15982 # Wait for client to close
15985 # pylint: disable=E1101
15986 # Instance of '_socketobject' has no ... member
15987 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15989 except socket.error, err:
15990 raise errcls("Client failed to confirm notification (%s)" % err)
15994 def _SendNotification(self, test, arg, sockname):
15995 """Sends a notification to the client.
15998 @param test: Test name
15999 @param arg: Test argument (depends on test)
16000 @type sockname: string
16001 @param sockname: Socket path
16004 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16006 def _Notify(self, prereq, test, arg):
16007 """Notifies the client of a test.
16010 @param prereq: Whether this is a prereq-phase test
16012 @param test: Test name
16013 @param arg: Test argument (depends on test)
16017 errcls = errors.OpPrereqError
16019 errcls = errors.OpExecError
16021 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16025 def CheckArguments(self):
16026 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16027 self.expandnames_calls = 0
16029 def ExpandNames(self):
16030 checkargs_calls = getattr(self, "checkargs_calls", 0)
16031 if checkargs_calls < 1:
16032 raise errors.ProgrammerError("CheckArguments was not called")
16034 self.expandnames_calls += 1
16036 if self.op.notify_waitlock:
16037 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16039 self.LogInfo("Expanding names")
16041 # Get lock on master node (just to get a lock, not for a particular reason)
16042 self.needed_locks = {
16043 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16046 def Exec(self, feedback_fn):
16047 if self.expandnames_calls < 1:
16048 raise errors.ProgrammerError("ExpandNames was not called")
16050 if self.op.notify_exec:
16051 self._Notify(False, constants.JQT_EXEC, None)
16053 self.LogInfo("Executing")
16055 if self.op.log_messages:
16056 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16057 for idx, msg in enumerate(self.op.log_messages):
16058 self.LogInfo("Sending log message %s", idx + 1)
16059 feedback_fn(constants.JQT_MSGPREFIX + msg)
16060 # Report how many test messages have been sent
16061 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16064 raise errors.OpExecError("Opcode failure was requested")
16069 class LUTestAllocator(NoHooksLU):
16070 """Run allocator tests.
16072 This LU runs the allocator tests
16075 def CheckPrereq(self):
16076 """Check prerequisites.
16078 This checks the opcode parameters depending on the director and mode test.
16081 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16082 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16083 for attr in ["memory", "disks", "disk_template",
16084 "os", "tags", "nics", "vcpus"]:
16085 if not hasattr(self.op, attr):
16086 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16087 attr, errors.ECODE_INVAL)
16088 iname = self.cfg.ExpandInstanceName(self.op.name)
16089 if iname is not None:
16090 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16091 iname, errors.ECODE_EXISTS)
16092 if not isinstance(self.op.nics, list):
16093 raise errors.OpPrereqError("Invalid parameter 'nics'",
16094 errors.ECODE_INVAL)
16095 if not isinstance(self.op.disks, list):
16096 raise errors.OpPrereqError("Invalid parameter 'disks'",
16097 errors.ECODE_INVAL)
16098 for row in self.op.disks:
16099 if (not isinstance(row, dict) or
16100 constants.IDISK_SIZE not in row or
16101 not isinstance(row[constants.IDISK_SIZE], int) or
16102 constants.IDISK_MODE not in row or
16103 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16104 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16105 " parameter", errors.ECODE_INVAL)
16106 if self.op.hypervisor is None:
16107 self.op.hypervisor = self.cfg.GetHypervisorType()
16108 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16109 fname = _ExpandInstanceName(self.cfg, self.op.name)
16110 self.op.name = fname
16111 self.relocate_from = \
16112 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16113 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16114 constants.IALLOCATOR_MODE_NODE_EVAC):
16115 if not self.op.instances:
16116 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16117 self.op.instances = _GetWantedInstances(self, self.op.instances)
16119 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16120 self.op.mode, errors.ECODE_INVAL)
16122 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16123 if self.op.iallocator is None:
16124 raise errors.OpPrereqError("Missing allocator name",
16125 errors.ECODE_INVAL)
16126 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16127 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16128 self.op.direction, errors.ECODE_INVAL)
16130 def Exec(self, feedback_fn):
16131 """Run the allocator test.
16134 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16135 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16136 memory=self.op.memory,
16137 disks=self.op.disks,
16138 disk_template=self.op.disk_template,
16142 vcpus=self.op.vcpus,
16143 spindle_use=self.op.spindle_use,
16144 hypervisor=self.op.hypervisor)
16145 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16146 req = iallocator.IAReqRelocate(name=self.op.name,
16147 relocate_from=list(self.relocate_from))
16148 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16149 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16150 target_groups=self.op.target_groups)
16151 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16152 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16153 evac_mode=self.op.evac_mode)
16154 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16155 disk_template = self.op.disk_template
16156 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16157 memory=self.op.memory,
16158 disks=self.op.disks,
16159 disk_template=disk_template,
16163 vcpus=self.op.vcpus,
16164 spindle_use=self.op.spindle_use,
16165 hypervisor=self.op.hypervisor)
16166 for idx in range(self.op.count)]
16167 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16169 raise errors.ProgrammerError("Uncatched mode %s in"
16170 " LUTestAllocator.Exec", self.op.mode)
16172 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16173 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16174 result = ial.in_text
16176 ial.Run(self.op.iallocator, validate=False)
16177 result = ial.out_text
16181 class LUNetworkAdd(LogicalUnit):
16182 """Logical unit for creating networks.
16185 HPATH = "network-add"
16186 HTYPE = constants.HTYPE_NETWORK
16189 def BuildHooksNodes(self):
16190 """Build hooks nodes.
16193 mn = self.cfg.GetMasterNode()
16194 return ([mn], [mn])
16196 def CheckArguments(self):
16197 if self.op.mac_prefix:
16198 self.op.mac_prefix = \
16199 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16201 def ExpandNames(self):
16202 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16204 if self.op.conflicts_check:
16205 self.share_locks[locking.LEVEL_NODE] = 1
16206 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16207 self.needed_locks = {
16208 locking.LEVEL_NODE: locking.ALL_SET,
16209 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16212 self.needed_locks = {}
16214 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16216 def CheckPrereq(self):
16217 if self.op.network is None:
16218 raise errors.OpPrereqError("Network must be given",
16219 errors.ECODE_INVAL)
16222 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16223 except errors.OpPrereqError:
16226 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16227 " network (UUID: %s)" %
16228 (self.op.network_name, existing_uuid),
16229 errors.ECODE_EXISTS)
16231 # Check tag validity
16232 for tag in self.op.tags:
16233 objects.TaggableObject.ValidateTag(tag)
16235 def BuildHooksEnv(self):
16236 """Build hooks env.
16240 "name": self.op.network_name,
16241 "subnet": self.op.network,
16242 "gateway": self.op.gateway,
16243 "network6": self.op.network6,
16244 "gateway6": self.op.gateway6,
16245 "mac_prefix": self.op.mac_prefix,
16246 "tags": self.op.tags,
16248 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16250 def Exec(self, feedback_fn):
16251 """Add the ip pool to the cluster.
16254 nobj = objects.Network(name=self.op.network_name,
16255 network=self.op.network,
16256 gateway=self.op.gateway,
16257 network6=self.op.network6,
16258 gateway6=self.op.gateway6,
16259 mac_prefix=self.op.mac_prefix,
16260 uuid=self.network_uuid)
16261 # Initialize the associated address pool
16263 pool = network.AddressPool.InitializeNetwork(nobj)
16264 except errors.AddressPoolError, err:
16265 raise errors.OpExecError("Cannot create IP address pool for network"
16266 " '%s': %s" % (self.op.network_name, err))
16268 # Check if we need to reserve the nodes and the cluster master IP
16269 # These may not be allocated to any instances in routed mode, as
16270 # they wouldn't function anyway.
16271 if self.op.conflicts_check:
16272 for node in self.cfg.GetAllNodesInfo().values():
16273 for ip in [node.primary_ip, node.secondary_ip]:
16275 if pool.Contains(ip):
16277 self.LogInfo("Reserved IP address of node '%s' (%s)",
16279 except errors.AddressPoolError, err:
16280 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16281 ip, node.name, err)
16283 master_ip = self.cfg.GetClusterInfo().master_ip
16285 if pool.Contains(master_ip):
16286 pool.Reserve(master_ip)
16287 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16288 except errors.AddressPoolError, err:
16289 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16292 if self.op.add_reserved_ips:
16293 for ip in self.op.add_reserved_ips:
16295 pool.Reserve(ip, external=True)
16296 except errors.AddressPoolError, err:
16297 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16301 for tag in self.op.tags:
16304 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16305 del self.remove_locks[locking.LEVEL_NETWORK]
16308 class LUNetworkRemove(LogicalUnit):
16309 HPATH = "network-remove"
16310 HTYPE = constants.HTYPE_NETWORK
16313 def ExpandNames(self):
16314 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16316 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16317 self.needed_locks = {
16318 locking.LEVEL_NETWORK: [self.network_uuid],
16319 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16322 def CheckPrereq(self):
16323 """Check prerequisites.
16325 This checks that the given network name exists as a network, that is
16326 empty (i.e., contains no nodes), and that is not the last group of the
16330 # Verify that the network is not conncted.
16331 node_groups = [group.name
16332 for group in self.cfg.GetAllNodeGroupsInfo().values()
16333 if self.network_uuid in group.networks]
16336 self.LogWarning("Network '%s' is connected to the following"
16337 " node groups: %s" %
16338 (self.op.network_name,
16339 utils.CommaJoin(utils.NiceSort(node_groups))))
16340 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16342 def BuildHooksEnv(self):
16343 """Build hooks env.
16347 "NETWORK_NAME": self.op.network_name,
16350 def BuildHooksNodes(self):
16351 """Build hooks nodes.
16354 mn = self.cfg.GetMasterNode()
16355 return ([mn], [mn])
16357 def Exec(self, feedback_fn):
16358 """Remove the network.
16362 self.cfg.RemoveNetwork(self.network_uuid)
16363 except errors.ConfigurationError:
16364 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16365 (self.op.network_name, self.network_uuid))
16368 class LUNetworkSetParams(LogicalUnit):
16369 """Modifies the parameters of a network.
16372 HPATH = "network-modify"
16373 HTYPE = constants.HTYPE_NETWORK
16376 def CheckArguments(self):
16377 if (self.op.gateway and
16378 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16379 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16380 " at once", errors.ECODE_INVAL)
16382 def ExpandNames(self):
16383 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16385 self.needed_locks = {
16386 locking.LEVEL_NETWORK: [self.network_uuid],
16389 def CheckPrereq(self):
16390 """Check prerequisites.
16393 self.network = self.cfg.GetNetwork(self.network_uuid)
16394 self.gateway = self.network.gateway
16395 self.mac_prefix = self.network.mac_prefix
16396 self.network6 = self.network.network6
16397 self.gateway6 = self.network.gateway6
16398 self.tags = self.network.tags
16400 self.pool = network.AddressPool(self.network)
16402 if self.op.gateway:
16403 if self.op.gateway == constants.VALUE_NONE:
16404 self.gateway = None
16406 self.gateway = self.op.gateway
16407 if self.pool.IsReserved(self.gateway):
16408 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16409 " reserved" % self.gateway,
16410 errors.ECODE_STATE)
16412 if self.op.mac_prefix:
16413 if self.op.mac_prefix == constants.VALUE_NONE:
16414 self.mac_prefix = None
16416 self.mac_prefix = \
16417 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16419 if self.op.gateway6:
16420 if self.op.gateway6 == constants.VALUE_NONE:
16421 self.gateway6 = None
16423 self.gateway6 = self.op.gateway6
16425 if self.op.network6:
16426 if self.op.network6 == constants.VALUE_NONE:
16427 self.network6 = None
16429 self.network6 = self.op.network6
16431 def BuildHooksEnv(self):
16432 """Build hooks env.
16436 "name": self.op.network_name,
16437 "subnet": self.network.network,
16438 "gateway": self.gateway,
16439 "network6": self.network6,
16440 "gateway6": self.gateway6,
16441 "mac_prefix": self.mac_prefix,
16444 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16446 def BuildHooksNodes(self):
16447 """Build hooks nodes.
16450 mn = self.cfg.GetMasterNode()
16451 return ([mn], [mn])
16453 def Exec(self, feedback_fn):
16454 """Modifies the network.
16457 #TODO: reserve/release via temporary reservation manager
16458 # extend cfg.ReserveIp/ReleaseIp with the external flag
16459 if self.op.gateway:
16460 if self.gateway == self.network.gateway:
16461 self.LogWarning("Gateway is already %s", self.gateway)
16464 self.pool.Reserve(self.gateway, external=True)
16465 if self.network.gateway:
16466 self.pool.Release(self.network.gateway, external=True)
16467 self.network.gateway = self.gateway
16469 if self.op.add_reserved_ips:
16470 for ip in self.op.add_reserved_ips:
16472 if self.pool.IsReserved(ip):
16473 self.LogWarning("IP address %s is already reserved", ip)
16475 self.pool.Reserve(ip, external=True)
16476 except errors.AddressPoolError, err:
16477 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16479 if self.op.remove_reserved_ips:
16480 for ip in self.op.remove_reserved_ips:
16481 if ip == self.network.gateway:
16482 self.LogWarning("Cannot unreserve Gateway's IP")
16485 if not self.pool.IsReserved(ip):
16486 self.LogWarning("IP address %s is already unreserved", ip)
16488 self.pool.Release(ip, external=True)
16489 except errors.AddressPoolError, err:
16490 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16492 if self.op.mac_prefix:
16493 self.network.mac_prefix = self.mac_prefix
16495 if self.op.network6:
16496 self.network.network6 = self.network6
16498 if self.op.gateway6:
16499 self.network.gateway6 = self.gateway6
16501 self.pool.Validate()
16503 self.cfg.Update(self.network, feedback_fn)
16506 class _NetworkQuery(_QueryBase):
16507 FIELDS = query.NETWORK_FIELDS
16509 def ExpandNames(self, lu):
16510 lu.needed_locks = {}
16511 lu.share_locks = _ShareAll()
16513 self.do_locking = self.use_locking
16515 all_networks = lu.cfg.GetAllNetworksInfo()
16516 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16522 for name in self.names:
16523 if name in name_to_uuid:
16524 self.wanted.append(name_to_uuid[name])
16526 missing.append(name)
16529 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16530 errors.ECODE_NOENT)
16532 self.wanted = locking.ALL_SET
16534 if self.do_locking:
16535 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16536 if query.NETQ_INST in self.requested_data:
16537 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16538 if query.NETQ_GROUP in self.requested_data:
16539 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16541 def DeclareLocks(self, lu, level):
16544 def _GetQueryData(self, lu):
16545 """Computes the list of networks and their attributes.
16548 all_networks = lu.cfg.GetAllNetworksInfo()
16550 network_uuids = self._GetNames(lu, all_networks.keys(),
16551 locking.LEVEL_NETWORK)
16553 do_instances = query.NETQ_INST in self.requested_data
16554 do_groups = query.NETQ_GROUP in self.requested_data
16556 network_to_instances = None
16557 network_to_groups = None
16559 # For NETQ_GROUP, we need to map network->[groups]
16561 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16562 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16563 for _, group in all_groups.iteritems():
16564 for net_uuid in network_uuids:
16565 netparams = group.networks.get(net_uuid, None)
16567 info = (group.name, netparams[constants.NIC_MODE],
16568 netparams[constants.NIC_LINK])
16570 network_to_groups[net_uuid].append(info)
16573 all_instances = lu.cfg.GetAllInstancesInfo()
16574 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16575 for instance in all_instances.values():
16576 for nic in instance.nics:
16577 if nic.network in network_uuids:
16578 network_to_instances[nic.network].append(instance.name)
16581 if query.NETQ_STATS in self.requested_data:
16584 self._GetStats(network.AddressPool(all_networks[uuid])))
16585 for uuid in network_uuids)
16589 return query.NetworkQueryData([all_networks[uuid]
16590 for uuid in network_uuids],
16592 network_to_instances,
16596 def _GetStats(pool):
16597 """Returns statistics for a network address pool.
16601 "free_count": pool.GetFreeCount(),
16602 "reserved_count": pool.GetReservedCount(),
16603 "map": pool.GetMap(),
16604 "external_reservations":
16605 utils.CommaJoin(pool.GetExternalReservations()),
16609 class LUNetworkQuery(NoHooksLU):
16610 """Logical unit for querying networks.
16615 def CheckArguments(self):
16616 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16617 self.op.output_fields, self.op.use_locking)
16619 def ExpandNames(self):
16620 self.nq.ExpandNames(self)
16622 def Exec(self, feedback_fn):
16623 return self.nq.OldStyleQuery(self)
16626 class LUNetworkConnect(LogicalUnit):
16627 """Connect a network to a nodegroup
16630 HPATH = "network-connect"
16631 HTYPE = constants.HTYPE_NETWORK
16634 def ExpandNames(self):
16635 self.network_name = self.op.network_name
16636 self.group_name = self.op.group_name
16637 self.network_mode = self.op.network_mode
16638 self.network_link = self.op.network_link
16640 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16641 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16643 self.needed_locks = {
16644 locking.LEVEL_INSTANCE: [],
16645 locking.LEVEL_NODEGROUP: [self.group_uuid],
16647 self.share_locks[locking.LEVEL_INSTANCE] = 1
16649 if self.op.conflicts_check:
16650 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16651 self.share_locks[locking.LEVEL_NETWORK] = 1
16653 def DeclareLocks(self, level):
16654 if level == locking.LEVEL_INSTANCE:
16655 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16657 # Lock instances optimistically, needs verification once group lock has
16659 if self.op.conflicts_check:
16660 self.needed_locks[locking.LEVEL_INSTANCE] = \
16661 self.cfg.GetNodeGroupInstances(self.group_uuid)
16663 def BuildHooksEnv(self):
16665 "GROUP_NAME": self.group_name,
16666 "GROUP_NETWORK_MODE": self.network_mode,
16667 "GROUP_NETWORK_LINK": self.network_link,
16671 def BuildHooksNodes(self):
16672 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16673 return (nodes, nodes)
16675 def CheckPrereq(self):
16676 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16678 assert self.group_uuid in owned_groups
16681 constants.NIC_MODE: self.network_mode,
16682 constants.NIC_LINK: self.network_link,
16684 objects.NIC.CheckParameterSyntax(self.netparams)
16686 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16687 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16688 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16689 self.connected = False
16690 if self.network_uuid in self.group.networks:
16691 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16692 (self.network_name, self.group.name))
16693 self.connected = True
16696 if self.op.conflicts_check:
16697 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16699 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16702 def Exec(self, feedback_fn):
16706 self.group.networks[self.network_uuid] = self.netparams
16707 self.cfg.Update(self.group, feedback_fn)
16710 def _NetworkConflictCheck(lu, check_fn, action):
16711 """Checks for network interface conflicts with a network.
16713 @type lu: L{LogicalUnit}
16714 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16716 @param check_fn: Function checking for conflict
16717 @type action: string
16718 @param action: Part of error message (see code)
16719 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16722 # Check if locked instances are still correct
16723 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16724 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16728 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16729 instconflicts = [(idx, nic.ip)
16730 for (idx, nic) in enumerate(instance.nics)
16734 conflicts.append((instance.name, instconflicts))
16737 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16738 " node group '%s', are in use: %s" %
16739 (lu.network_name, action, lu.group.name,
16740 utils.CommaJoin(("%s: %s" %
16741 (name, _FmtNetworkConflict(details)))
16742 for (name, details) in conflicts)))
16744 raise errors.OpPrereqError("Conflicting IP addresses found; "
16745 " remove/modify the corresponding network"
16746 " interfaces", errors.ECODE_STATE)
16749 def _FmtNetworkConflict(details):
16750 """Utility for L{_NetworkConflictCheck}.
16753 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16754 for (idx, ipaddr) in details)
16757 class LUNetworkDisconnect(LogicalUnit):
16758 """Disconnect a network to a nodegroup
16761 HPATH = "network-disconnect"
16762 HTYPE = constants.HTYPE_NETWORK
16765 def ExpandNames(self):
16766 self.network_name = self.op.network_name
16767 self.group_name = self.op.group_name
16769 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16770 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16772 self.needed_locks = {
16773 locking.LEVEL_INSTANCE: [],
16774 locking.LEVEL_NODEGROUP: [self.group_uuid],
16776 self.share_locks[locking.LEVEL_INSTANCE] = 1
16778 def DeclareLocks(self, level):
16779 if level == locking.LEVEL_INSTANCE:
16780 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16782 # Lock instances optimistically, needs verification once group lock has
16784 self.needed_locks[locking.LEVEL_INSTANCE] = \
16785 self.cfg.GetNodeGroupInstances(self.group_uuid)
16787 def BuildHooksEnv(self):
16789 "GROUP_NAME": self.group_name,
16793 def BuildHooksNodes(self):
16794 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16795 return (nodes, nodes)
16797 def CheckPrereq(self):
16798 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16800 assert self.group_uuid in owned_groups
16802 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16803 self.connected = True
16804 if self.network_uuid not in self.group.networks:
16805 self.LogWarning("Network '%s' is not mapped to group '%s'",
16806 self.network_name, self.group.name)
16807 self.connected = False
16810 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16813 def Exec(self, feedback_fn):
16814 if not self.connected:
16817 del self.group.networks[self.network_uuid]
16818 self.cfg.Update(self.group, feedback_fn)
16821 #: Query type implementations
16823 constants.QR_CLUSTER: _ClusterQuery,
16824 constants.QR_INSTANCE: _InstanceQuery,
16825 constants.QR_NODE: _NodeQuery,
16826 constants.QR_GROUP: _GroupQuery,
16827 constants.QR_NETWORK: _NetworkQuery,
16828 constants.QR_OS: _OsQuery,
16829 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16830 constants.QR_EXPORT: _ExportQuery,
16833 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16836 def _GetQueryImplementation(name):
16837 """Returns the implemtnation for a query type.
16839 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16843 return _QUERY_IMPL[name]
16845 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16846 errors.ECODE_INVAL)
16849 def _CheckForConflictingIp(lu, ip, node):
16850 """In case of conflicting IP address raise error.
16853 @param ip: IP address
16855 @param node: node name
16858 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16859 if conf_net is not None:
16860 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16862 errors.ECODE_STATE)
16864 return (None, None)