4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckGlobalHvParams(params):
1024 """Validates that given hypervisor params are not global ones.
1026 This will ensure that instances don't get customised versions of
1030 used_globals = constants.HVC_GLOBALS.intersection(params)
1032 msg = ("The following hypervisor parameters are global and cannot"
1033 " be customized at instance level, please modify them at"
1034 " cluster level: %s" % utils.CommaJoin(used_globals))
1035 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1038 def _CheckNodeOnline(lu, node, msg=None):
1039 """Ensure that a given node is online.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @param msg: if passed, should be a message to replace the default one
1044 @raise errors.OpPrereqError: if the node is offline
1048 msg = "Can't use offline node"
1049 if lu.cfg.GetNodeInfo(node).offline:
1050 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1053 def _CheckNodeNotDrained(lu, node):
1054 """Ensure that a given node is not drained.
1056 @param lu: the LU on behalf of which we make the check
1057 @param node: the node to check
1058 @raise errors.OpPrereqError: if the node is drained
1061 if lu.cfg.GetNodeInfo(node).drained:
1062 raise errors.OpPrereqError("Can't use drained node %s" % node,
1066 def _CheckNodeVmCapable(lu, node):
1067 """Ensure that a given node is vm capable.
1069 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @raise errors.OpPrereqError: if the node is not vm capable
1074 if not lu.cfg.GetNodeInfo(node).vm_capable:
1075 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080 """Ensure that a node supports a given OS.
1082 @param lu: the LU on behalf of which we make the check
1083 @param node: the node to check
1084 @param os_name: the OS to query about
1085 @param force_variant: whether to ignore variant errors
1086 @raise errors.OpPrereqError: if the node is not supporting the OS
1089 result = lu.rpc.call_os_get(node, os_name)
1090 result.Raise("OS '%s' not in supported OS list for node %s" %
1092 prereq=True, ecode=errors.ECODE_INVAL)
1093 if not force_variant:
1094 _CheckOSVariant(result.payload, os_name)
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098 """Ensure that a node has the given secondary ip.
1100 @type lu: L{LogicalUnit}
1101 @param lu: the LU on behalf of which we make the check
1103 @param node: the node to check
1104 @type secondary_ip: string
1105 @param secondary_ip: the ip to check
1106 @type prereq: boolean
1107 @param prereq: whether to throw a prerequisite or an execute error
1108 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1112 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113 result.Raise("Failure checking secondary ip on node %s" % node,
1114 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115 if not result.payload:
1116 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117 " please fix and re-run this command" % secondary_ip)
1119 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1121 raise errors.OpExecError(msg)
1124 def _CheckNodePVs(nresult, exclusive_storage):
1128 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129 if pvlist_dict is None:
1130 return (["Can't get PV list from node"], None)
1131 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1133 # check that ':' is not present in PV names, since it's a
1134 # special character for lvcreate (denotes the range of PEs to
1138 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139 (pv.name, pv.vg_name))
1141 if exclusive_storage:
1142 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143 errlist.extend(errmsgs)
1144 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1146 for (pvname, lvlist) in shared_pvs:
1147 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149 (pvname, utils.CommaJoin(lvlist)))
1150 return (errlist, es_pvinfo)
1153 def _GetClusterDomainSecret():
1154 """Reads the cluster domain secret.
1157 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162 """Ensure that an instance is in one of the required states.
1164 @param lu: the LU on behalf of which we make the check
1165 @param instance: the instance to check
1166 @param msg: if passed, should be a message to replace the default one
1167 @raise errors.OpPrereqError: if the instance is not in the required state
1171 msg = ("can't use instance from outside %s states" %
1172 utils.CommaJoin(req_states))
1173 if instance.admin_state not in req_states:
1174 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175 (instance.name, instance.admin_state, msg),
1178 if constants.ADMINST_UP not in req_states:
1179 pnode = instance.primary_node
1180 if not lu.cfg.GetNodeInfo(pnode).offline:
1181 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183 prereq=True, ecode=errors.ECODE_ENVIRON)
1184 if instance.name in ins_l.payload:
1185 raise errors.OpPrereqError("Instance %s is running, %s" %
1186 (instance.name, msg), errors.ECODE_STATE)
1188 lu.LogWarning("Primary node offline, ignoring check that instance"
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193 """Computes if value is in the desired range.
1195 @param name: name of the parameter for which we perform the check
1196 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1198 @param ipolicy: dictionary containing min, max and std values
1199 @param value: actual value that we want to use
1200 @return: None or element not meeting the criteria
1204 if value in [None, constants.VALUE_AUTO]:
1206 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208 if value > max_v or min_v > value:
1210 fqn = "%s/%s" % (name, qualifier)
1213 return ("%s value %s is not in range [%s, %s]" %
1214 (fqn, value, min_v, max_v))
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219 nic_count, disk_sizes, spindle_use,
1220 _compute_fn=_ComputeMinMaxSpec):
1221 """Verifies ipolicy against provided specs.
1224 @param ipolicy: The ipolicy
1226 @param mem_size: The memory size
1227 @type cpu_count: int
1228 @param cpu_count: Used cpu cores
1229 @type disk_count: int
1230 @param disk_count: Number of disks used
1231 @type nic_count: int
1232 @param nic_count: Number of nics used
1233 @type disk_sizes: list of ints
1234 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235 @type spindle_use: int
1236 @param spindle_use: The number of spindles this instance uses
1237 @param _compute_fn: The compute function (unittest only)
1238 @return: A list of violations, or an empty list of no violations are found
1241 assert disk_count == len(disk_sizes)
1244 (constants.ISPEC_MEM_SIZE, "", mem_size),
1245 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246 (constants.ISPEC_DISK_COUNT, "", disk_count),
1247 (constants.ISPEC_NIC_COUNT, "", nic_count),
1248 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250 for idx, d in enumerate(disk_sizes)]
1253 (_compute_fn(name, qualifier, ipolicy, value)
1254 for (name, qualifier, value) in test_settings))
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258 _compute_fn=_ComputeIPolicySpecViolation):
1259 """Compute if instance meets the specs of ipolicy.
1262 @param ipolicy: The ipolicy to verify against
1263 @type instance: L{objects.Instance}
1264 @param instance: The instance to verify
1265 @param _compute_fn: The function to verify ipolicy (unittest only)
1266 @see: L{_ComputeIPolicySpecViolation}
1269 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272 disk_count = len(instance.disks)
1273 disk_sizes = [disk.size for disk in instance.disks]
1274 nic_count = len(instance.nics)
1276 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277 disk_sizes, spindle_use)
1280 def _ComputeIPolicyInstanceSpecViolation(
1281 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282 """Compute if instance specs meets the specs of ipolicy.
1285 @param ipolicy: The ipolicy to verify against
1286 @param instance_spec: dict
1287 @param instance_spec: The instance spec to verify
1288 @param _compute_fn: The function to verify ipolicy (unittest only)
1289 @see: L{_ComputeIPolicySpecViolation}
1292 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1299 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300 disk_sizes, spindle_use)
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1305 _compute_fn=_ComputeIPolicyInstanceViolation):
1306 """Compute if instance meets the specs of the new target group.
1308 @param ipolicy: The ipolicy to verify
1309 @param instance: The instance object to verify
1310 @param current_group: The current group of the instance
1311 @param target_group: The new group of the instance
1312 @param _compute_fn: The function to verify ipolicy (unittest only)
1313 @see: L{_ComputeIPolicySpecViolation}
1316 if current_group == target_group:
1319 return _compute_fn(ipolicy, instance)
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323 _compute_fn=_ComputeIPolicyNodeViolation):
1324 """Checks that the target node is correct in terms of instance policy.
1326 @param ipolicy: The ipolicy to verify
1327 @param instance: The instance object to verify
1328 @param node: The new node to relocate
1329 @param ignore: Ignore violations of the ipolicy
1330 @param _compute_fn: The function to verify ipolicy (unittest only)
1331 @see: L{_ComputeIPolicySpecViolation}
1334 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1338 msg = ("Instance does not meet target node group's (%s) instance"
1339 " policy: %s") % (node.group, utils.CommaJoin(res))
1343 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347 """Computes a set of any instances that would violate the new ipolicy.
1349 @param old_ipolicy: The current (still in-place) ipolicy
1350 @param new_ipolicy: The new (to become) ipolicy
1351 @param instances: List of instances to verify
1352 @return: A list of instances which violates the new ipolicy but
1356 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357 _ComputeViolatingInstances(old_ipolicy, instances))
1360 def _ExpandItemName(fn, name, kind):
1361 """Expand an item name.
1363 @param fn: the function to use for expansion
1364 @param name: requested item name
1365 @param kind: text description ('Node' or 'Instance')
1366 @return: the resolved (full) name
1367 @raise errors.OpPrereqError: if the item is not found
1370 full_name = fn(name)
1371 if full_name is None:
1372 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1377 def _ExpandNodeName(cfg, name):
1378 """Wrapper over L{_ExpandItemName} for nodes."""
1379 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1382 def _ExpandInstanceName(cfg, name):
1383 """Wrapper over L{_ExpandItemName} for instance."""
1384 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388 network_type, mac_prefix, tags):
1389 """Builds network related env variables for hooks
1391 This builds the hook environment from individual variables.
1394 @param name: the name of the network
1395 @type subnet: string
1396 @param subnet: the ipv4 subnet
1397 @type gateway: string
1398 @param gateway: the ipv4 gateway
1399 @type network6: string
1400 @param network6: the ipv6 subnet
1401 @type gateway6: string
1402 @param gateway6: the ipv6 gateway
1403 @type network_type: string
1404 @param network_type: the type of the network
1405 @type mac_prefix: string
1406 @param mac_prefix: the mac_prefix
1408 @param tags: the tags of the network
1413 env["NETWORK_NAME"] = name
1415 env["NETWORK_SUBNET"] = subnet
1417 env["NETWORK_GATEWAY"] = gateway
1419 env["NETWORK_SUBNET6"] = network6
1421 env["NETWORK_GATEWAY6"] = gateway6
1423 env["NETWORK_MAC_PREFIX"] = mac_prefix
1425 env["NETWORK_TYPE"] = network_type
1427 env["NETWORK_TAGS"] = " ".join(tags)
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433 minmem, maxmem, vcpus, nics, disk_template, disks,
1434 bep, hvp, hypervisor_name, tags):
1435 """Builds instance related env variables for hooks
1437 This builds the hook environment from individual variables.
1440 @param name: the name of the instance
1441 @type primary_node: string
1442 @param primary_node: the name of the instance's primary node
1443 @type secondary_nodes: list
1444 @param secondary_nodes: list of secondary nodes as strings
1445 @type os_type: string
1446 @param os_type: the name of the instance's OS
1447 @type status: string
1448 @param status: the desired status of the instance
1449 @type minmem: string
1450 @param minmem: the minimum memory size of the instance
1451 @type maxmem: string
1452 @param maxmem: the maximum memory size of the instance
1454 @param vcpus: the count of VCPUs the instance has
1456 @param nics: list of tuples (ip, mac, mode, link, network) representing
1457 the NICs the instance has
1458 @type disk_template: string
1459 @param disk_template: the disk template of the instance
1461 @param disks: the list of (size, mode) pairs
1463 @param bep: the backend parameters for the instance
1465 @param hvp: the hypervisor parameters for the instance
1466 @type hypervisor_name: string
1467 @param hypervisor_name: the hypervisor for the instance
1469 @param tags: list of instance tags as strings
1471 @return: the hook environment for this instance
1476 "INSTANCE_NAME": name,
1477 "INSTANCE_PRIMARY": primary_node,
1478 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479 "INSTANCE_OS_TYPE": os_type,
1480 "INSTANCE_STATUS": status,
1481 "INSTANCE_MINMEM": minmem,
1482 "INSTANCE_MAXMEM": maxmem,
1483 # TODO(2.7) remove deprecated "memory" value
1484 "INSTANCE_MEMORY": maxmem,
1485 "INSTANCE_VCPUS": vcpus,
1486 "INSTANCE_DISK_TEMPLATE": disk_template,
1487 "INSTANCE_HYPERVISOR": hypervisor_name,
1490 nic_count = len(nics)
1491 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1494 env["INSTANCE_NIC%d_IP" % idx] = ip
1495 env["INSTANCE_NIC%d_MAC" % idx] = mac
1496 env["INSTANCE_NIC%d_MODE" % idx] = mode
1497 env["INSTANCE_NIC%d_LINK" % idx] = link
1499 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1501 nobj = objects.Network.FromDict(netinfo)
1503 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1505 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1507 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1509 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1511 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512 if nobj.network_type:
1513 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1515 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1554 cluster = lu.cfg.GetClusterInfo()
1555 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556 mode = filled_params[constants.NIC_MODE]
1557 link = filled_params[constants.NIC_LINK]
1560 net_uuid = lu.cfg.LookupNetwork(nic.network)
1561 netinfo = objects.Network.ToDict(lu.cfg.GetNetwork(net_uuid))
1563 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1566 def _NICListToTuple(lu, nics):
1567 """Build a list of nic information tuples.
1569 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1570 value in LUInstanceQueryData.
1572 @type lu: L{LogicalUnit}
1573 @param lu: the logical unit on whose behalf we execute
1574 @type nics: list of L{objects.NIC}
1575 @param nics: list of nics to convert to hooks tuples
1580 hooks_nics.append(_NICToTuple(lu, nic))
1584 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1585 """Builds instance related env variables for hooks from an object.
1587 @type lu: L{LogicalUnit}
1588 @param lu: the logical unit on whose behalf we execute
1589 @type instance: L{objects.Instance}
1590 @param instance: the instance for which we should build the
1592 @type override: dict
1593 @param override: dictionary with key/values that will override
1596 @return: the hook environment dictionary
1599 cluster = lu.cfg.GetClusterInfo()
1600 bep = cluster.FillBE(instance)
1601 hvp = cluster.FillHV(instance)
1603 "name": instance.name,
1604 "primary_node": instance.primary_node,
1605 "secondary_nodes": instance.secondary_nodes,
1606 "os_type": instance.os,
1607 "status": instance.admin_state,
1608 "maxmem": bep[constants.BE_MAXMEM],
1609 "minmem": bep[constants.BE_MINMEM],
1610 "vcpus": bep[constants.BE_VCPUS],
1611 "nics": _NICListToTuple(lu, instance.nics),
1612 "disk_template": instance.disk_template,
1613 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1616 "hypervisor_name": instance.hypervisor,
1617 "tags": instance.tags,
1620 args.update(override)
1621 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1624 def _AdjustCandidatePool(lu, exceptions):
1625 """Adjust the candidate pool after node operations.
1628 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1630 lu.LogInfo("Promoted nodes to master candidate role: %s",
1631 utils.CommaJoin(node.name for node in mod_list))
1632 for name in mod_list:
1633 lu.context.ReaddNode(name)
1634 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1636 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1640 def _DecideSelfPromotion(lu, exceptions=None):
1641 """Decide whether I should promote myself as a master candidate.
1644 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1645 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1646 # the new node will increase mc_max with one, so:
1647 mc_should = min(mc_should + 1, cp_size)
1648 return mc_now < mc_should
1651 def _ComputeViolatingInstances(ipolicy, instances):
1652 """Computes a set of instances who violates given ipolicy.
1654 @param ipolicy: The ipolicy to verify
1655 @type instances: object.Instance
1656 @param instances: List of instances to verify
1657 @return: A frozenset of instance names violating the ipolicy
1660 return frozenset([inst.name for inst in instances
1661 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1664 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1665 """Check that the brigdes needed by a list of nics exist.
1668 cluster = lu.cfg.GetClusterInfo()
1669 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1670 brlist = [params[constants.NIC_LINK] for params in paramslist
1671 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1673 result = lu.rpc.call_bridges_exist(target_node, brlist)
1674 result.Raise("Error checking bridges on destination node '%s'" %
1675 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1678 def _CheckInstanceBridgesExist(lu, instance, node=None):
1679 """Check that the brigdes needed by an instance exist.
1683 node = instance.primary_node
1684 _CheckNicsBridgesExist(lu, instance.nics, node)
1687 def _CheckOSVariant(os_obj, name):
1688 """Check whether an OS name conforms to the os variants specification.
1690 @type os_obj: L{objects.OS}
1691 @param os_obj: OS object to check
1693 @param name: OS name passed by the user, to check for validity
1696 variant = objects.OS.GetVariant(name)
1697 if not os_obj.supported_variants:
1699 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1700 " passed)" % (os_obj.name, variant),
1704 raise errors.OpPrereqError("OS name must include a variant",
1707 if variant not in os_obj.supported_variants:
1708 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1711 def _GetNodeInstancesInner(cfg, fn):
1712 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1715 def _GetNodeInstances(cfg, node_name):
1716 """Returns a list of all primary and secondary instances on a node.
1720 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1723 def _GetNodePrimaryInstances(cfg, node_name):
1724 """Returns primary instances on a node.
1727 return _GetNodeInstancesInner(cfg,
1728 lambda inst: node_name == inst.primary_node)
1731 def _GetNodeSecondaryInstances(cfg, node_name):
1732 """Returns secondary instances on a node.
1735 return _GetNodeInstancesInner(cfg,
1736 lambda inst: node_name in inst.secondary_nodes)
1739 def _GetStorageTypeArgs(cfg, storage_type):
1740 """Returns the arguments for a storage type.
1743 # Special case for file storage
1744 if storage_type == constants.ST_FILE:
1745 # storage.FileStorage wants a list of storage directories
1746 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1751 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1754 for dev in instance.disks:
1755 cfg.SetDiskID(dev, node_name)
1757 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1759 result.Raise("Failed to get disk status from node %s" % node_name,
1760 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1762 for idx, bdev_status in enumerate(result.payload):
1763 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1769 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1770 """Check the sanity of iallocator and node arguments and use the
1771 cluster-wide iallocator if appropriate.
1773 Check that at most one of (iallocator, node) is specified. If none is
1774 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1775 then the LU's opcode's iallocator slot is filled with the cluster-wide
1778 @type iallocator_slot: string
1779 @param iallocator_slot: the name of the opcode iallocator slot
1780 @type node_slot: string
1781 @param node_slot: the name of the opcode target node slot
1784 node = getattr(lu.op, node_slot, None)
1785 ialloc = getattr(lu.op, iallocator_slot, None)
1789 if node is not None and ialloc is not None:
1790 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1792 elif ((node is None and ialloc is None) or
1793 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1794 default_iallocator = lu.cfg.GetDefaultIAllocator()
1795 if default_iallocator:
1796 setattr(lu.op, iallocator_slot, default_iallocator)
1798 raise errors.OpPrereqError("No iallocator or node given and no"
1799 " cluster-wide default iallocator found;"
1800 " please specify either an iallocator or a"
1801 " node, or set a cluster-wide default"
1802 " iallocator", errors.ECODE_INVAL)
1805 def _GetDefaultIAllocator(cfg, ialloc):
1806 """Decides on which iallocator to use.
1808 @type cfg: L{config.ConfigWriter}
1809 @param cfg: Cluster configuration object
1810 @type ialloc: string or None
1811 @param ialloc: Iallocator specified in opcode
1813 @return: Iallocator name
1817 # Use default iallocator
1818 ialloc = cfg.GetDefaultIAllocator()
1821 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1822 " opcode nor as a cluster-wide default",
1828 def _CheckHostnameSane(lu, name):
1829 """Ensures that a given hostname resolves to a 'sane' name.
1831 The given name is required to be a prefix of the resolved hostname,
1832 to prevent accidental mismatches.
1834 @param lu: the logical unit on behalf of which we're checking
1835 @param name: the name we should resolve and check
1836 @return: the resolved hostname object
1839 hostname = netutils.GetHostname(name=name)
1840 if hostname.name != name:
1841 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1842 if not utils.MatchNameComponent(name, [hostname.name]):
1843 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1844 " same as given hostname '%s'") %
1845 (hostname.name, name), errors.ECODE_INVAL)
1849 class LUClusterPostInit(LogicalUnit):
1850 """Logical unit for running hooks after cluster initialization.
1853 HPATH = "cluster-init"
1854 HTYPE = constants.HTYPE_CLUSTER
1856 def BuildHooksEnv(self):
1861 "OP_TARGET": self.cfg.GetClusterName(),
1864 def BuildHooksNodes(self):
1865 """Build hooks nodes.
1868 return ([], [self.cfg.GetMasterNode()])
1870 def Exec(self, feedback_fn):
1877 class LUClusterDestroy(LogicalUnit):
1878 """Logical unit for destroying the cluster.
1881 HPATH = "cluster-destroy"
1882 HTYPE = constants.HTYPE_CLUSTER
1884 def BuildHooksEnv(self):
1889 "OP_TARGET": self.cfg.GetClusterName(),
1892 def BuildHooksNodes(self):
1893 """Build hooks nodes.
1898 def CheckPrereq(self):
1899 """Check prerequisites.
1901 This checks whether the cluster is empty.
1903 Any errors are signaled by raising errors.OpPrereqError.
1906 master = self.cfg.GetMasterNode()
1908 nodelist = self.cfg.GetNodeList()
1909 if len(nodelist) != 1 or nodelist[0] != master:
1910 raise errors.OpPrereqError("There are still %d node(s) in"
1911 " this cluster." % (len(nodelist) - 1),
1913 instancelist = self.cfg.GetInstanceList()
1915 raise errors.OpPrereqError("There are still %d instance(s) in"
1916 " this cluster." % len(instancelist),
1919 def Exec(self, feedback_fn):
1920 """Destroys the cluster.
1923 master_params = self.cfg.GetMasterNetworkParameters()
1925 # Run post hooks on master node before it's removed
1926 _RunPostHook(self, master_params.name)
1928 ems = self.cfg.GetUseExternalMipScript()
1929 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1932 self.LogWarning("Error disabling the master IP address: %s",
1935 return master_params.name
1938 def _VerifyCertificate(filename):
1939 """Verifies a certificate for L{LUClusterVerifyConfig}.
1941 @type filename: string
1942 @param filename: Path to PEM file
1946 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1947 utils.ReadFile(filename))
1948 except Exception, err: # pylint: disable=W0703
1949 return (LUClusterVerifyConfig.ETYPE_ERROR,
1950 "Failed to load X509 certificate %s: %s" % (filename, err))
1953 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1954 constants.SSL_CERT_EXPIRATION_ERROR)
1957 fnamemsg = "While verifying %s: %s" % (filename, msg)
1962 return (None, fnamemsg)
1963 elif errcode == utils.CERT_WARNING:
1964 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1965 elif errcode == utils.CERT_ERROR:
1966 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1968 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1971 def _GetAllHypervisorParameters(cluster, instances):
1972 """Compute the set of all hypervisor parameters.
1974 @type cluster: L{objects.Cluster}
1975 @param cluster: the cluster object
1976 @param instances: list of L{objects.Instance}
1977 @param instances: additional instances from which to obtain parameters
1978 @rtype: list of (origin, hypervisor, parameters)
1979 @return: a list with all parameters found, indicating the hypervisor they
1980 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1985 for hv_name in cluster.enabled_hypervisors:
1986 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1988 for os_name, os_hvp in cluster.os_hvp.items():
1989 for hv_name, hv_params in os_hvp.items():
1991 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1992 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1994 # TODO: collapse identical parameter values in a single one
1995 for instance in instances:
1996 if instance.hvparams:
1997 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1998 cluster.FillHV(instance)))
2003 class _VerifyErrors(object):
2004 """Mix-in for cluster/group verify LUs.
2006 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2007 self.op and self._feedback_fn to be available.)
2011 ETYPE_FIELD = "code"
2012 ETYPE_ERROR = "ERROR"
2013 ETYPE_WARNING = "WARNING"
2015 def _Error(self, ecode, item, msg, *args, **kwargs):
2016 """Format an error message.
2018 Based on the opcode's error_codes parameter, either format a
2019 parseable error code, or a simpler error string.
2021 This must be called only from Exec and functions called from Exec.
2024 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2025 itype, etxt, _ = ecode
2026 # If the error code is in the list of ignored errors, demote the error to a
2028 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2029 ltype = self.ETYPE_WARNING
2030 # first complete the msg
2033 # then format the whole message
2034 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2035 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2041 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2042 # and finally report it via the feedback_fn
2043 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2044 # do not mark the operation as failed for WARN cases only
2045 if ltype == self.ETYPE_ERROR:
2048 def _ErrorIf(self, cond, *args, **kwargs):
2049 """Log an error message if the passed condition is True.
2053 or self.op.debug_simulate_errors): # pylint: disable=E1101
2054 self._Error(*args, **kwargs)
2057 class LUClusterVerify(NoHooksLU):
2058 """Submits all jobs necessary to verify the cluster.
2063 def ExpandNames(self):
2064 self.needed_locks = {}
2066 def Exec(self, feedback_fn):
2069 if self.op.group_name:
2070 groups = [self.op.group_name]
2071 depends_fn = lambda: None
2073 groups = self.cfg.GetNodeGroupList()
2075 # Verify global configuration
2077 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2080 # Always depend on global verification
2081 depends_fn = lambda: [(-len(jobs), [])]
2084 [opcodes.OpClusterVerifyGroup(group_name=group,
2085 ignore_errors=self.op.ignore_errors,
2086 depends=depends_fn())]
2087 for group in groups)
2089 # Fix up all parameters
2090 for op in itertools.chain(*jobs): # pylint: disable=W0142
2091 op.debug_simulate_errors = self.op.debug_simulate_errors
2092 op.verbose = self.op.verbose
2093 op.error_codes = self.op.error_codes
2095 op.skip_checks = self.op.skip_checks
2096 except AttributeError:
2097 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2099 return ResultWithJobs(jobs)
2102 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2103 """Verifies the cluster config.
2108 def _VerifyHVP(self, hvp_data):
2109 """Verifies locally the syntax of the hypervisor parameters.
2112 for item, hv_name, hv_params in hvp_data:
2113 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2116 hv_class = hypervisor.GetHypervisorClass(hv_name)
2117 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2118 hv_class.CheckParameterSyntax(hv_params)
2119 except errors.GenericError, err:
2120 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2122 def ExpandNames(self):
2123 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2124 self.share_locks = _ShareAll()
2126 def CheckPrereq(self):
2127 """Check prerequisites.
2130 # Retrieve all information
2131 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2132 self.all_node_info = self.cfg.GetAllNodesInfo()
2133 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2135 def Exec(self, feedback_fn):
2136 """Verify integrity of cluster, performing various test on nodes.
2140 self._feedback_fn = feedback_fn
2142 feedback_fn("* Verifying cluster config")
2144 for msg in self.cfg.VerifyConfig():
2145 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2147 feedback_fn("* Verifying cluster certificate files")
2149 for cert_filename in pathutils.ALL_CERT_FILES:
2150 (errcode, msg) = _VerifyCertificate(cert_filename)
2151 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2153 feedback_fn("* Verifying hypervisor parameters")
2155 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2156 self.all_inst_info.values()))
2158 feedback_fn("* Verifying all nodes belong to an existing group")
2160 # We do this verification here because, should this bogus circumstance
2161 # occur, it would never be caught by VerifyGroup, which only acts on
2162 # nodes/instances reachable from existing node groups.
2164 dangling_nodes = set(node.name for node in self.all_node_info.values()
2165 if node.group not in self.all_group_info)
2167 dangling_instances = {}
2168 no_node_instances = []
2170 for inst in self.all_inst_info.values():
2171 if inst.primary_node in dangling_nodes:
2172 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2173 elif inst.primary_node not in self.all_node_info:
2174 no_node_instances.append(inst.name)
2179 utils.CommaJoin(dangling_instances.get(node.name,
2181 for node in dangling_nodes]
2183 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2185 "the following nodes (and their instances) belong to a non"
2186 " existing group: %s", utils.CommaJoin(pretty_dangling))
2188 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2190 "the following instances have a non-existing primary-node:"
2191 " %s", utils.CommaJoin(no_node_instances))
2196 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2197 """Verifies the status of a node group.
2200 HPATH = "cluster-verify"
2201 HTYPE = constants.HTYPE_CLUSTER
2204 _HOOKS_INDENT_RE = re.compile("^", re.M)
2206 class NodeImage(object):
2207 """A class representing the logical and physical status of a node.
2210 @ivar name: the node name to which this object refers
2211 @ivar volumes: a structure as returned from
2212 L{ganeti.backend.GetVolumeList} (runtime)
2213 @ivar instances: a list of running instances (runtime)
2214 @ivar pinst: list of configured primary instances (config)
2215 @ivar sinst: list of configured secondary instances (config)
2216 @ivar sbp: dictionary of {primary-node: list of instances} for all
2217 instances for which this node is secondary (config)
2218 @ivar mfree: free memory, as reported by hypervisor (runtime)
2219 @ivar dfree: free disk, as reported by the node (runtime)
2220 @ivar offline: the offline status (config)
2221 @type rpc_fail: boolean
2222 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2223 not whether the individual keys were correct) (runtime)
2224 @type lvm_fail: boolean
2225 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2226 @type hyp_fail: boolean
2227 @ivar hyp_fail: whether the RPC call didn't return the instance list
2228 @type ghost: boolean
2229 @ivar ghost: whether this is a known node or not (config)
2230 @type os_fail: boolean
2231 @ivar os_fail: whether the RPC call didn't return valid OS data
2233 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2234 @type vm_capable: boolean
2235 @ivar vm_capable: whether the node can host instances
2237 @ivar pv_min: size in MiB of the smallest PVs
2239 @ivar pv_max: size in MiB of the biggest PVs
2242 def __init__(self, offline=False, name=None, vm_capable=True):
2251 self.offline = offline
2252 self.vm_capable = vm_capable
2253 self.rpc_fail = False
2254 self.lvm_fail = False
2255 self.hyp_fail = False
2257 self.os_fail = False
2262 def ExpandNames(self):
2263 # This raises errors.OpPrereqError on its own:
2264 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2266 # Get instances in node group; this is unsafe and needs verification later
2268 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2270 self.needed_locks = {
2271 locking.LEVEL_INSTANCE: inst_names,
2272 locking.LEVEL_NODEGROUP: [self.group_uuid],
2273 locking.LEVEL_NODE: [],
2275 # This opcode is run by watcher every five minutes and acquires all nodes
2276 # for a group. It doesn't run for a long time, so it's better to acquire
2277 # the node allocation lock as well.
2278 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2281 self.share_locks = _ShareAll()
2283 def DeclareLocks(self, level):
2284 if level == locking.LEVEL_NODE:
2285 # Get members of node group; this is unsafe and needs verification later
2286 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2288 all_inst_info = self.cfg.GetAllInstancesInfo()
2290 # In Exec(), we warn about mirrored instances that have primary and
2291 # secondary living in separate node groups. To fully verify that
2292 # volumes for these instances are healthy, we will need to do an
2293 # extra call to their secondaries. We ensure here those nodes will
2295 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2296 # Important: access only the instances whose lock is owned
2297 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2298 nodes.update(all_inst_info[inst].secondary_nodes)
2300 self.needed_locks[locking.LEVEL_NODE] = nodes
2302 def CheckPrereq(self):
2303 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2304 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2306 group_nodes = set(self.group_info.members)
2308 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2311 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2313 unlocked_instances = \
2314 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2317 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2318 utils.CommaJoin(unlocked_nodes),
2321 if unlocked_instances:
2322 raise errors.OpPrereqError("Missing lock for instances: %s" %
2323 utils.CommaJoin(unlocked_instances),
2326 self.all_node_info = self.cfg.GetAllNodesInfo()
2327 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2329 self.my_node_names = utils.NiceSort(group_nodes)
2330 self.my_inst_names = utils.NiceSort(group_instances)
2332 self.my_node_info = dict((name, self.all_node_info[name])
2333 for name in self.my_node_names)
2335 self.my_inst_info = dict((name, self.all_inst_info[name])
2336 for name in self.my_inst_names)
2338 # We detect here the nodes that will need the extra RPC calls for verifying
2339 # split LV volumes; they should be locked.
2340 extra_lv_nodes = set()
2342 for inst in self.my_inst_info.values():
2343 if inst.disk_template in constants.DTS_INT_MIRROR:
2344 for nname in inst.all_nodes:
2345 if self.all_node_info[nname].group != self.group_uuid:
2346 extra_lv_nodes.add(nname)
2348 unlocked_lv_nodes = \
2349 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2351 if unlocked_lv_nodes:
2352 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2353 utils.CommaJoin(unlocked_lv_nodes),
2355 self.extra_lv_nodes = list(extra_lv_nodes)
2357 def _VerifyNode(self, ninfo, nresult):
2358 """Perform some basic validation on data returned from a node.
2360 - check the result data structure is well formed and has all the
2362 - check ganeti version
2364 @type ninfo: L{objects.Node}
2365 @param ninfo: the node to check
2366 @param nresult: the results from the node
2368 @return: whether overall this call was successful (and we can expect
2369 reasonable values in the respose)
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 # main result, nresult should be a non-empty dict
2376 test = not nresult or not isinstance(nresult, dict)
2377 _ErrorIf(test, constants.CV_ENODERPC, node,
2378 "unable to verify node: no data returned")
2382 # compares ganeti version
2383 local_version = constants.PROTOCOL_VERSION
2384 remote_version = nresult.get("version", None)
2385 test = not (remote_version and
2386 isinstance(remote_version, (list, tuple)) and
2387 len(remote_version) == 2)
2388 _ErrorIf(test, constants.CV_ENODERPC, node,
2389 "connection to node returned invalid data")
2393 test = local_version != remote_version[0]
2394 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2395 "incompatible protocol versions: master %s,"
2396 " node %s", local_version, remote_version[0])
2400 # node seems compatible, we can actually try to look into its results
2402 # full package version
2403 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2404 constants.CV_ENODEVERSION, node,
2405 "software version mismatch: master %s, node %s",
2406 constants.RELEASE_VERSION, remote_version[1],
2407 code=self.ETYPE_WARNING)
2409 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2410 if ninfo.vm_capable and isinstance(hyp_result, dict):
2411 for hv_name, hv_result in hyp_result.iteritems():
2412 test = hv_result is not None
2413 _ErrorIf(test, constants.CV_ENODEHV, node,
2414 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2416 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2417 if ninfo.vm_capable and isinstance(hvp_result, list):
2418 for item, hv_name, hv_result in hvp_result:
2419 _ErrorIf(True, constants.CV_ENODEHV, node,
2420 "hypervisor %s parameter verify failure (source %s): %s",
2421 hv_name, item, hv_result)
2423 test = nresult.get(constants.NV_NODESETUP,
2424 ["Missing NODESETUP results"])
2425 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2430 def _VerifyNodeTime(self, ninfo, nresult,
2431 nvinfo_starttime, nvinfo_endtime):
2432 """Check the node time.
2434 @type ninfo: L{objects.Node}
2435 @param ninfo: the node to check
2436 @param nresult: the remote results for the node
2437 @param nvinfo_starttime: the start time of the RPC call
2438 @param nvinfo_endtime: the end time of the RPC call
2442 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444 ntime = nresult.get(constants.NV_TIME, None)
2446 ntime_merged = utils.MergeTime(ntime)
2447 except (ValueError, TypeError):
2448 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2451 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2452 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2453 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2454 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2458 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2459 "Node time diverges by at least %s from master node time",
2462 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2463 """Check the node LVM results and update info for cross-node checks.
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param vg_name: the configured VG name
2469 @type nimg: L{NodeImage}
2470 @param nimg: node image
2477 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2479 # checks vg existence and size > 20G
2480 vglist = nresult.get(constants.NV_VGLIST, None)
2482 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2484 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2485 constants.MIN_VG_SIZE)
2486 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2489 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2491 self._Error(constants.CV_ENODELVM, node, em)
2492 if pvminmax is not None:
2493 (nimg.pv_min, nimg.pv_max) = pvminmax
2495 def _VerifyGroupLVM(self, node_image, vg_name):
2496 """Check cross-node consistency in LVM.
2498 @type node_image: dict
2499 @param node_image: info about nodes, mapping from node to names to
2500 L{NodeImage} objects
2501 @param vg_name: the configured VG name
2507 # Only exlcusive storage needs this kind of checks
2508 if not self._exclusive_storage:
2511 # exclusive_storage wants all PVs to have the same size (approximately),
2512 # if the smallest and the biggest ones are okay, everything is fine.
2513 # pv_min is None iff pv_max is None
2514 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2517 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2518 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2519 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2520 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2521 "PV sizes differ too much in the group; smallest (%s MB) is"
2522 " on %s, biggest (%s MB) is on %s",
2523 pvmin, minnode, pvmax, maxnode)
2525 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2526 """Check the node bridges.
2528 @type ninfo: L{objects.Node}
2529 @param ninfo: the node to check
2530 @param nresult: the remote results for the node
2531 @param bridges: the expected list of bridges
2538 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2540 missing = nresult.get(constants.NV_BRIDGES, None)
2541 test = not isinstance(missing, list)
2542 _ErrorIf(test, constants.CV_ENODENET, node,
2543 "did not return valid bridge information")
2545 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2546 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2548 def _VerifyNodeUserScripts(self, ninfo, nresult):
2549 """Check the results of user scripts presence and executability on the node
2551 @type ninfo: L{objects.Node}
2552 @param ninfo: the node to check
2553 @param nresult: the remote results for the node
2558 test = not constants.NV_USERSCRIPTS in nresult
2559 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2560 "did not return user scripts information")
2562 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2564 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2565 "user scripts not present or not executable: %s" %
2566 utils.CommaJoin(sorted(broken_scripts)))
2568 def _VerifyNodeNetwork(self, ninfo, nresult):
2569 """Check the node network connectivity results.
2571 @type ninfo: L{objects.Node}
2572 @param ninfo: the node to check
2573 @param nresult: the remote results for the node
2577 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2579 test = constants.NV_NODELIST not in nresult
2580 _ErrorIf(test, constants.CV_ENODESSH, node,
2581 "node hasn't returned node ssh connectivity data")
2583 if nresult[constants.NV_NODELIST]:
2584 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2585 _ErrorIf(True, constants.CV_ENODESSH, node,
2586 "ssh communication with node '%s': %s", a_node, a_msg)
2588 test = constants.NV_NODENETTEST not in nresult
2589 _ErrorIf(test, constants.CV_ENODENET, node,
2590 "node hasn't returned node tcp connectivity data")
2592 if nresult[constants.NV_NODENETTEST]:
2593 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2595 _ErrorIf(True, constants.CV_ENODENET, node,
2596 "tcp communication with node '%s': %s",
2597 anode, nresult[constants.NV_NODENETTEST][anode])
2599 test = constants.NV_MASTERIP not in nresult
2600 _ErrorIf(test, constants.CV_ENODENET, node,
2601 "node hasn't returned node master IP reachability data")
2603 if not nresult[constants.NV_MASTERIP]:
2604 if node == self.master_node:
2605 msg = "the master node cannot reach the master IP (not configured?)"
2607 msg = "cannot reach the master IP"
2608 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2610 def _VerifyInstance(self, instance, inst_config, node_image,
2612 """Verify an instance.
2614 This function checks to see if the required block devices are
2615 available on the instance's node, and that the nodes are in the correct
2619 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620 pnode = inst_config.primary_node
2621 pnode_img = node_image[pnode]
2622 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2624 node_vol_should = {}
2625 inst_config.MapLVsByNode(node_vol_should)
2627 cluster = self.cfg.GetClusterInfo()
2628 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2630 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2631 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2632 code=self.ETYPE_WARNING)
2634 for node in node_vol_should:
2635 n_img = node_image[node]
2636 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2637 # ignore missing volumes on offline or broken nodes
2639 for volume in node_vol_should[node]:
2640 test = volume not in n_img.volumes
2641 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2642 "volume %s missing on node %s", volume, node)
2644 if inst_config.admin_state == constants.ADMINST_UP:
2645 test = instance not in pnode_img.instances and not pnode_img.offline
2646 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2647 "instance not running on its primary node %s",
2649 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2650 "instance is marked as running and lives on offline node %s",
2653 diskdata = [(nname, success, status, idx)
2654 for (nname, disks) in diskstatus.items()
2655 for idx, (success, status) in enumerate(disks)]
2657 for nname, success, bdev_status, idx in diskdata:
2658 # the 'ghost node' construction in Exec() ensures that we have a
2660 snode = node_image[nname]
2661 bad_snode = snode.ghost or snode.offline
2662 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2663 not success and not bad_snode,
2664 constants.CV_EINSTANCEFAULTYDISK, instance,
2665 "couldn't retrieve status for disk/%s on %s: %s",
2666 idx, nname, bdev_status)
2667 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2668 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2669 constants.CV_EINSTANCEFAULTYDISK, instance,
2670 "disk/%s on %s is faulty", idx, nname)
2672 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2673 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2674 " primary node failed", instance)
2676 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2677 constants.CV_EINSTANCELAYOUT,
2678 instance, "instance has multiple secondary nodes: %s",
2679 utils.CommaJoin(inst_config.secondary_nodes),
2680 code=self.ETYPE_WARNING)
2682 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2683 # Disk template not compatible with exclusive_storage: no instance
2684 # node should have the flag set
2685 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2686 inst_config.all_nodes)
2687 es_nodes = [n for (n, es) in es_flags.items()
2689 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2690 "instance has template %s, which is not supported on nodes"
2691 " that have exclusive storage set: %s",
2692 inst_config.disk_template, utils.CommaJoin(es_nodes))
2694 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2695 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2696 instance_groups = {}
2698 for node in instance_nodes:
2699 instance_groups.setdefault(self.all_node_info[node].group,
2703 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2704 # Sort so that we always list the primary node first.
2705 for group, nodes in sorted(instance_groups.items(),
2706 key=lambda (_, nodes): pnode in nodes,
2709 self._ErrorIf(len(instance_groups) > 1,
2710 constants.CV_EINSTANCESPLITGROUPS,
2711 instance, "instance has primary and secondary nodes in"
2712 " different groups: %s", utils.CommaJoin(pretty_list),
2713 code=self.ETYPE_WARNING)
2715 inst_nodes_offline = []
2716 for snode in inst_config.secondary_nodes:
2717 s_img = node_image[snode]
2718 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2719 snode, "instance %s, connection to secondary node failed",
2723 inst_nodes_offline.append(snode)
2725 # warn that the instance lives on offline nodes
2726 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2727 "instance has offline secondary node(s) %s",
2728 utils.CommaJoin(inst_nodes_offline))
2729 # ... or ghost/non-vm_capable nodes
2730 for node in inst_config.all_nodes:
2731 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2732 instance, "instance lives on ghost node %s", node)
2733 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2734 instance, "instance lives on non-vm_capable node %s", node)
2736 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2737 """Verify if there are any unknown volumes in the cluster.
2739 The .os, .swap and backup volumes are ignored. All other volumes are
2740 reported as unknown.
2742 @type reserved: L{ganeti.utils.FieldSet}
2743 @param reserved: a FieldSet of reserved volume names
2746 for node, n_img in node_image.items():
2747 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2748 self.all_node_info[node].group != self.group_uuid):
2749 # skip non-healthy nodes
2751 for volume in n_img.volumes:
2752 test = ((node not in node_vol_should or
2753 volume not in node_vol_should[node]) and
2754 not reserved.Matches(volume))
2755 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2756 "volume %s is unknown", volume)
2758 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2759 """Verify N+1 Memory Resilience.
2761 Check that if one single node dies we can still start all the
2762 instances it was primary for.
2765 cluster_info = self.cfg.GetClusterInfo()
2766 for node, n_img in node_image.items():
2767 # This code checks that every node which is now listed as
2768 # secondary has enough memory to host all instances it is
2769 # supposed to should a single other node in the cluster fail.
2770 # FIXME: not ready for failover to an arbitrary node
2771 # FIXME: does not support file-backed instances
2772 # WARNING: we currently take into account down instances as well
2773 # as up ones, considering that even if they're down someone
2774 # might want to start them even in the event of a node failure.
2775 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2776 # we're skipping nodes marked offline and nodes in other groups from
2777 # the N+1 warning, since most likely we don't have good memory
2778 # infromation from them; we already list instances living on such
2779 # nodes, and that's enough warning
2781 #TODO(dynmem): also consider ballooning out other instances
2782 for prinode, instances in n_img.sbp.items():
2784 for instance in instances:
2785 bep = cluster_info.FillBE(instance_cfg[instance])
2786 if bep[constants.BE_AUTO_BALANCE]:
2787 needed_mem += bep[constants.BE_MINMEM]
2788 test = n_img.mfree < needed_mem
2789 self._ErrorIf(test, constants.CV_ENODEN1, node,
2790 "not enough memory to accomodate instance failovers"
2791 " should node %s fail (%dMiB needed, %dMiB available)",
2792 prinode, needed_mem, n_img.mfree)
2795 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2796 (files_all, files_opt, files_mc, files_vm)):
2797 """Verifies file checksums collected from all nodes.
2799 @param errorif: Callback for reporting errors
2800 @param nodeinfo: List of L{objects.Node} objects
2801 @param master_node: Name of master node
2802 @param all_nvinfo: RPC results
2805 # Define functions determining which nodes to consider for a file
2808 (files_mc, lambda node: (node.master_candidate or
2809 node.name == master_node)),
2810 (files_vm, lambda node: node.vm_capable),
2813 # Build mapping from filename to list of nodes which should have the file
2815 for (files, fn) in files2nodefn:
2817 filenodes = nodeinfo
2819 filenodes = filter(fn, nodeinfo)
2820 nodefiles.update((filename,
2821 frozenset(map(operator.attrgetter("name"), filenodes)))
2822 for filename in files)
2824 assert set(nodefiles) == (files_all | files_mc | files_vm)
2826 fileinfo = dict((filename, {}) for filename in nodefiles)
2827 ignore_nodes = set()
2829 for node in nodeinfo:
2831 ignore_nodes.add(node.name)
2834 nresult = all_nvinfo[node.name]
2836 if nresult.fail_msg or not nresult.payload:
2839 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2840 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2841 for (key, value) in fingerprints.items())
2844 test = not (node_files and isinstance(node_files, dict))
2845 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2846 "Node did not return file checksum data")
2848 ignore_nodes.add(node.name)
2851 # Build per-checksum mapping from filename to nodes having it
2852 for (filename, checksum) in node_files.items():
2853 assert filename in nodefiles
2854 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2856 for (filename, checksums) in fileinfo.items():
2857 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2859 # Nodes having the file
2860 with_file = frozenset(node_name
2861 for nodes in fileinfo[filename].values()
2862 for node_name in nodes) - ignore_nodes
2864 expected_nodes = nodefiles[filename] - ignore_nodes
2866 # Nodes missing file
2867 missing_file = expected_nodes - with_file
2869 if filename in files_opt:
2871 errorif(missing_file and missing_file != expected_nodes,
2872 constants.CV_ECLUSTERFILECHECK, None,
2873 "File %s is optional, but it must exist on all or no"
2874 " nodes (not found on %s)",
2875 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2877 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2878 "File %s is missing from node(s) %s", filename,
2879 utils.CommaJoin(utils.NiceSort(missing_file)))
2881 # Warn if a node has a file it shouldn't
2882 unexpected = with_file - expected_nodes
2884 constants.CV_ECLUSTERFILECHECK, None,
2885 "File %s should not exist on node(s) %s",
2886 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2888 # See if there are multiple versions of the file
2889 test = len(checksums) > 1
2891 variants = ["variant %s on %s" %
2892 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2893 for (idx, (checksum, nodes)) in
2894 enumerate(sorted(checksums.items()))]
2898 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2899 "File %s found with %s different checksums (%s)",
2900 filename, len(checksums), "; ".join(variants))
2902 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2904 """Verifies and the node DRBD status.
2906 @type ninfo: L{objects.Node}
2907 @param ninfo: the node to check
2908 @param nresult: the remote results for the node
2909 @param instanceinfo: the dict of instances
2910 @param drbd_helper: the configured DRBD usermode helper
2911 @param drbd_map: the DRBD map as returned by
2912 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2919 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2920 test = (helper_result is None)
2921 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2922 "no drbd usermode helper returned")
2924 status, payload = helper_result
2926 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2927 "drbd usermode helper check unsuccessful: %s", payload)
2928 test = status and (payload != drbd_helper)
2929 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2930 "wrong drbd usermode helper: %s", payload)
2932 # compute the DRBD minors
2934 for minor, instance in drbd_map[node].items():
2935 test = instance not in instanceinfo
2936 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2937 "ghost instance '%s' in temporary DRBD map", instance)
2938 # ghost instance should not be running, but otherwise we
2939 # don't give double warnings (both ghost instance and
2940 # unallocated minor in use)
2942 node_drbd[minor] = (instance, False)
2944 instance = instanceinfo[instance]
2945 node_drbd[minor] = (instance.name,
2946 instance.admin_state == constants.ADMINST_UP)
2948 # and now check them
2949 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2950 test = not isinstance(used_minors, (tuple, list))
2951 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2952 "cannot parse drbd status file: %s", str(used_minors))
2954 # we cannot check drbd status
2957 for minor, (iname, must_exist) in node_drbd.items():
2958 test = minor not in used_minors and must_exist
2959 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2960 "drbd minor %d of instance %s is not active", minor, iname)
2961 for minor in used_minors:
2962 test = minor not in node_drbd
2963 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964 "unallocated drbd minor %d is in use", minor)
2966 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2967 """Builds the node OS structures.
2969 @type ninfo: L{objects.Node}
2970 @param ninfo: the node to check
2971 @param nresult: the remote results for the node
2972 @param nimg: the node image object
2976 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2978 remote_os = nresult.get(constants.NV_OSLIST, None)
2979 test = (not isinstance(remote_os, list) or
2980 not compat.all(isinstance(v, list) and len(v) == 7
2981 for v in remote_os))
2983 _ErrorIf(test, constants.CV_ENODEOS, node,
2984 "node hasn't returned valid OS data")
2993 for (name, os_path, status, diagnose,
2994 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2996 if name not in os_dict:
2999 # parameters is a list of lists instead of list of tuples due to
3000 # JSON lacking a real tuple type, fix it:
3001 parameters = [tuple(v) for v in parameters]
3002 os_dict[name].append((os_path, status, diagnose,
3003 set(variants), set(parameters), set(api_ver)))
3005 nimg.oslist = os_dict
3007 def _VerifyNodeOS(self, ninfo, nimg, base):
3008 """Verifies the node OS list.
3010 @type ninfo: L{objects.Node}
3011 @param ninfo: the node to check
3012 @param nimg: the node image object
3013 @param base: the 'template' node we match against (e.g. from the master)
3017 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3019 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3021 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3022 for os_name, os_data in nimg.oslist.items():
3023 assert os_data, "Empty OS status for OS %s?!" % os_name
3024 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3025 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3026 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3027 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3028 "OS '%s' has multiple entries (first one shadows the rest): %s",
3029 os_name, utils.CommaJoin([v[0] for v in os_data]))
3030 # comparisons with the 'base' image
3031 test = os_name not in base.oslist
3032 _ErrorIf(test, constants.CV_ENODEOS, node,
3033 "Extra OS %s not present on reference node (%s)",
3037 assert base.oslist[os_name], "Base node has empty OS status?"
3038 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3040 # base OS is invalid, skipping
3042 for kind, a, b in [("API version", f_api, b_api),
3043 ("variants list", f_var, b_var),
3044 ("parameters", beautify_params(f_param),
3045 beautify_params(b_param))]:
3046 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3047 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3048 kind, os_name, base.name,
3049 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3051 # check any missing OSes
3052 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3053 _ErrorIf(missing, constants.CV_ENODEOS, node,
3054 "OSes present on reference node %s but missing on this node: %s",
3055 base.name, utils.CommaJoin(missing))
3057 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3058 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3060 @type ninfo: L{objects.Node}
3061 @param ninfo: the node to check
3062 @param nresult: the remote results for the node
3063 @type is_master: bool
3064 @param is_master: Whether node is the master node
3070 (constants.ENABLE_FILE_STORAGE or
3071 constants.ENABLE_SHARED_FILE_STORAGE)):
3073 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3075 # This should never happen
3076 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3077 "Node did not return forbidden file storage paths")
3079 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3080 "Found forbidden file storage paths: %s",
3081 utils.CommaJoin(fspaths))
3083 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3084 constants.CV_ENODEFILESTORAGEPATHS, node,
3085 "Node should not have returned forbidden file storage"
3088 def _VerifyOob(self, ninfo, nresult):
3089 """Verifies out of band functionality of a node.
3091 @type ninfo: L{objects.Node}
3092 @param ninfo: the node to check
3093 @param nresult: the remote results for the node
3097 # We just have to verify the paths on master and/or master candidates
3098 # as the oob helper is invoked on the master
3099 if ((ninfo.master_candidate or ninfo.master_capable) and
3100 constants.NV_OOB_PATHS in nresult):
3101 for path_result in nresult[constants.NV_OOB_PATHS]:
3102 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3104 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3105 """Verifies and updates the node volume data.
3107 This function will update a L{NodeImage}'s internal structures
3108 with data from the remote call.
3110 @type ninfo: L{objects.Node}
3111 @param ninfo: the node to check
3112 @param nresult: the remote results for the node
3113 @param nimg: the node image object
3114 @param vg_name: the configured VG name
3118 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3120 nimg.lvm_fail = True
3121 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3124 elif isinstance(lvdata, basestring):
3125 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3126 utils.SafeEncode(lvdata))
3127 elif not isinstance(lvdata, dict):
3128 _ErrorIf(True, constants.CV_ENODELVM, node,
3129 "rpc call to node failed (lvlist)")
3131 nimg.volumes = lvdata
3132 nimg.lvm_fail = False
3134 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3135 """Verifies and updates the node instance list.
3137 If the listing was successful, then updates this node's instance
3138 list. Otherwise, it marks the RPC call as failed for the instance
3141 @type ninfo: L{objects.Node}
3142 @param ninfo: the node to check
3143 @param nresult: the remote results for the node
3144 @param nimg: the node image object
3147 idata = nresult.get(constants.NV_INSTANCELIST, None)
3148 test = not isinstance(idata, list)
3149 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3150 "rpc call to node failed (instancelist): %s",
3151 utils.SafeEncode(str(idata)))
3153 nimg.hyp_fail = True
3155 nimg.instances = idata
3157 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3158 """Verifies and computes a node information map
3160 @type ninfo: L{objects.Node}
3161 @param ninfo: the node to check
3162 @param nresult: the remote results for the node
3163 @param nimg: the node image object
3164 @param vg_name: the configured VG name
3168 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3170 # try to read free memory (from the hypervisor)
3171 hv_info = nresult.get(constants.NV_HVINFO, None)
3172 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3173 _ErrorIf(test, constants.CV_ENODEHV, node,
3174 "rpc call to node failed (hvinfo)")
3177 nimg.mfree = int(hv_info["memory_free"])
3178 except (ValueError, TypeError):
3179 _ErrorIf(True, constants.CV_ENODERPC, node,
3180 "node returned invalid nodeinfo, check hypervisor")
3182 # FIXME: devise a free space model for file based instances as well
3183 if vg_name is not None:
3184 test = (constants.NV_VGLIST not in nresult or
3185 vg_name not in nresult[constants.NV_VGLIST])
3186 _ErrorIf(test, constants.CV_ENODELVM, node,
3187 "node didn't return data for the volume group '%s'"
3188 " - it is either missing or broken", vg_name)
3191 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3192 except (ValueError, TypeError):
3193 _ErrorIf(True, constants.CV_ENODERPC, node,
3194 "node returned invalid LVM info, check LVM status")
3196 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3197 """Gets per-disk status information for all instances.
3199 @type nodelist: list of strings
3200 @param nodelist: Node names
3201 @type node_image: dict of (name, L{objects.Node})
3202 @param node_image: Node objects
3203 @type instanceinfo: dict of (name, L{objects.Instance})
3204 @param instanceinfo: Instance objects
3205 @rtype: {instance: {node: [(succes, payload)]}}
3206 @return: a dictionary of per-instance dictionaries with nodes as
3207 keys and disk information as values; the disk information is a
3208 list of tuples (success, payload)
3211 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3214 node_disks_devonly = {}
3215 diskless_instances = set()
3216 diskless = constants.DT_DISKLESS
3218 for nname in nodelist:
3219 node_instances = list(itertools.chain(node_image[nname].pinst,
3220 node_image[nname].sinst))
3221 diskless_instances.update(inst for inst in node_instances
3222 if instanceinfo[inst].disk_template == diskless)
3223 disks = [(inst, disk)
3224 for inst in node_instances
3225 for disk in instanceinfo[inst].disks]
3228 # No need to collect data
3231 node_disks[nname] = disks
3233 # _AnnotateDiskParams makes already copies of the disks
3235 for (inst, dev) in disks:
3236 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3237 self.cfg.SetDiskID(anno_disk, nname)
3238 devonly.append(anno_disk)
3240 node_disks_devonly[nname] = devonly
3242 assert len(node_disks) == len(node_disks_devonly)
3244 # Collect data from all nodes with disks
3245 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3248 assert len(result) == len(node_disks)
3252 for (nname, nres) in result.items():
3253 disks = node_disks[nname]
3256 # No data from this node
3257 data = len(disks) * [(False, "node offline")]
3260 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3261 "while getting disk information: %s", msg)
3263 # No data from this node
3264 data = len(disks) * [(False, msg)]
3267 for idx, i in enumerate(nres.payload):
3268 if isinstance(i, (tuple, list)) and len(i) == 2:
3271 logging.warning("Invalid result from node %s, entry %d: %s",
3273 data.append((False, "Invalid result from the remote node"))
3275 for ((inst, _), status) in zip(disks, data):
3276 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3278 # Add empty entries for diskless instances.
3279 for inst in diskless_instances:
3280 assert inst not in instdisk
3283 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3284 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3285 compat.all(isinstance(s, (tuple, list)) and
3286 len(s) == 2 for s in statuses)
3287 for inst, nnames in instdisk.items()
3288 for nname, statuses in nnames.items())
3290 instdisk_keys = set(instdisk)
3291 instanceinfo_keys = set(instanceinfo)
3292 assert instdisk_keys == instanceinfo_keys, \
3293 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3294 (instdisk_keys, instanceinfo_keys))
3299 def _SshNodeSelector(group_uuid, all_nodes):
3300 """Create endless iterators for all potential SSH check hosts.
3303 nodes = [node for node in all_nodes
3304 if (node.group != group_uuid and
3306 keyfunc = operator.attrgetter("group")
3308 return map(itertools.cycle,
3309 [sorted(map(operator.attrgetter("name"), names))
3310 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3314 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3315 """Choose which nodes should talk to which other nodes.
3317 We will make nodes contact all nodes in their group, and one node from
3320 @warning: This algorithm has a known issue if one node group is much
3321 smaller than others (e.g. just one node). In such a case all other
3322 nodes will talk to the single node.
3325 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3326 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3328 return (online_nodes,
3329 dict((name, sorted([i.next() for i in sel]))
3330 for name in online_nodes))
3332 def BuildHooksEnv(self):
3335 Cluster-Verify hooks just ran in the post phase and their failure makes
3336 the output be logged in the verify output and the verification to fail.
3340 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3343 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3344 for node in self.my_node_info.values())
3348 def BuildHooksNodes(self):
3349 """Build hooks nodes.
3352 return ([], self.my_node_names)
3354 def Exec(self, feedback_fn):
3355 """Verify integrity of the node group, performing various test on nodes.
3358 # This method has too many local variables. pylint: disable=R0914
3359 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3361 if not self.my_node_names:
3363 feedback_fn("* Empty node group, skipping verification")
3367 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3368 verbose = self.op.verbose
3369 self._feedback_fn = feedback_fn
3371 vg_name = self.cfg.GetVGName()
3372 drbd_helper = self.cfg.GetDRBDHelper()
3373 cluster = self.cfg.GetClusterInfo()
3374 hypervisors = cluster.enabled_hypervisors
3375 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3377 i_non_redundant = [] # Non redundant instances
3378 i_non_a_balanced = [] # Non auto-balanced instances
3379 i_offline = 0 # Count of offline instances
3380 n_offline = 0 # Count of offline nodes
3381 n_drained = 0 # Count of nodes being drained
3382 node_vol_should = {}
3384 # FIXME: verify OS list
3387 filemap = _ComputeAncillaryFiles(cluster, False)
3389 # do local checksums
3390 master_node = self.master_node = self.cfg.GetMasterNode()
3391 master_ip = self.cfg.GetMasterIP()
3393 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3396 if self.cfg.GetUseExternalMipScript():
3397 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3399 node_verify_param = {
3400 constants.NV_FILELIST:
3401 map(vcluster.MakeVirtualPath,
3402 utils.UniqueSequence(filename
3403 for files in filemap
3404 for filename in files)),
3405 constants.NV_NODELIST:
3406 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3407 self.all_node_info.values()),
3408 constants.NV_HYPERVISOR: hypervisors,
3409 constants.NV_HVPARAMS:
3410 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3411 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3412 for node in node_data_list
3413 if not node.offline],
3414 constants.NV_INSTANCELIST: hypervisors,
3415 constants.NV_VERSION: None,
3416 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3417 constants.NV_NODESETUP: None,
3418 constants.NV_TIME: None,
3419 constants.NV_MASTERIP: (master_node, master_ip),
3420 constants.NV_OSLIST: None,
3421 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3422 constants.NV_USERSCRIPTS: user_scripts,
3425 if vg_name is not None:
3426 node_verify_param[constants.NV_VGLIST] = None
3427 node_verify_param[constants.NV_LVLIST] = vg_name
3428 node_verify_param[constants.NV_PVLIST] = [vg_name]
3431 node_verify_param[constants.NV_DRBDLIST] = None
3432 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3434 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3435 # Load file storage paths only from master node
3436 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3439 # FIXME: this needs to be changed per node-group, not cluster-wide
3441 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3442 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3443 bridges.add(default_nicpp[constants.NIC_LINK])
3444 for instance in self.my_inst_info.values():
3445 for nic in instance.nics:
3446 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3447 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3448 bridges.add(full_nic[constants.NIC_LINK])
3451 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3453 # Build our expected cluster state
3454 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3456 vm_capable=node.vm_capable))
3457 for node in node_data_list)
3461 for node in self.all_node_info.values():
3462 path = _SupportsOob(self.cfg, node)
3463 if path and path not in oob_paths:
3464 oob_paths.append(path)
3467 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3469 for instance in self.my_inst_names:
3470 inst_config = self.my_inst_info[instance]
3471 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3474 for nname in inst_config.all_nodes:
3475 if nname not in node_image:
3476 gnode = self.NodeImage(name=nname)
3477 gnode.ghost = (nname not in self.all_node_info)
3478 node_image[nname] = gnode
3480 inst_config.MapLVsByNode(node_vol_should)
3482 pnode = inst_config.primary_node
3483 node_image[pnode].pinst.append(instance)
3485 for snode in inst_config.secondary_nodes:
3486 nimg = node_image[snode]
3487 nimg.sinst.append(instance)
3488 if pnode not in nimg.sbp:
3489 nimg.sbp[pnode] = []
3490 nimg.sbp[pnode].append(instance)
3492 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3494 # The value of exclusive_storage should be the same across the group, so if
3495 # it's True for at least a node, we act as if it were set for all the nodes
3496 self._exclusive_storage = compat.any(es_flags.values())
3497 if self._exclusive_storage:
3498 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3499 es_unset_nodes = [n for (n, es) in es_flags.items()
3503 self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3504 "The exclusive_storage flag should be uniform in a group,"
3505 " but these nodes have it unset: %s",
3506 utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3507 self.LogWarning("Some checks required by exclusive storage will be"
3508 " performed also on nodes with the flag unset")
3510 # At this point, we have the in-memory data structures complete,
3511 # except for the runtime information, which we'll gather next
3513 # Due to the way our RPC system works, exact response times cannot be
3514 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3515 # time before and after executing the request, we can at least have a time
3517 nvinfo_starttime = time.time()
3518 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3520 self.cfg.GetClusterName())
3521 nvinfo_endtime = time.time()
3523 if self.extra_lv_nodes and vg_name is not None:
3525 self.rpc.call_node_verify(self.extra_lv_nodes,
3526 {constants.NV_LVLIST: vg_name},
3527 self.cfg.GetClusterName())
3529 extra_lv_nvinfo = {}
3531 all_drbd_map = self.cfg.ComputeDRBDMap()
3533 feedback_fn("* Gathering disk information (%s nodes)" %
3534 len(self.my_node_names))
3535 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3538 feedback_fn("* Verifying configuration file consistency")
3540 # If not all nodes are being checked, we need to make sure the master node
3541 # and a non-checked vm_capable node are in the list.
3542 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3544 vf_nvinfo = all_nvinfo.copy()
3545 vf_node_info = list(self.my_node_info.values())
3546 additional_nodes = []
3547 if master_node not in self.my_node_info:
3548 additional_nodes.append(master_node)
3549 vf_node_info.append(self.all_node_info[master_node])
3550 # Add the first vm_capable node we find which is not included,
3551 # excluding the master node (which we already have)
3552 for node in absent_nodes:
3553 nodeinfo = self.all_node_info[node]
3554 if (nodeinfo.vm_capable and not nodeinfo.offline and
3555 node != master_node):
3556 additional_nodes.append(node)
3557 vf_node_info.append(self.all_node_info[node])
3559 key = constants.NV_FILELIST
3560 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3561 {key: node_verify_param[key]},
3562 self.cfg.GetClusterName()))
3564 vf_nvinfo = all_nvinfo
3565 vf_node_info = self.my_node_info.values()
3567 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3569 feedback_fn("* Verifying node status")
3573 for node_i in node_data_list:
3575 nimg = node_image[node]
3579 feedback_fn("* Skipping offline node %s" % (node,))
3583 if node == master_node:
3585 elif node_i.master_candidate:
3586 ntype = "master candidate"
3587 elif node_i.drained:
3593 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3595 msg = all_nvinfo[node].fail_msg
3596 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3599 nimg.rpc_fail = True
3602 nresult = all_nvinfo[node].payload
3604 nimg.call_ok = self._VerifyNode(node_i, nresult)
3605 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3606 self._VerifyNodeNetwork(node_i, nresult)
3607 self._VerifyNodeUserScripts(node_i, nresult)
3608 self._VerifyOob(node_i, nresult)
3609 self._VerifyFileStoragePaths(node_i, nresult,
3610 node == master_node)
3613 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3614 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3617 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3618 self._UpdateNodeInstances(node_i, nresult, nimg)
3619 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3620 self._UpdateNodeOS(node_i, nresult, nimg)
3622 if not nimg.os_fail:
3623 if refos_img is None:
3625 self._VerifyNodeOS(node_i, nimg, refos_img)
3626 self._VerifyNodeBridges(node_i, nresult, bridges)
3628 # Check whether all running instancies are primary for the node. (This
3629 # can no longer be done from _VerifyInstance below, since some of the
3630 # wrong instances could be from other node groups.)
3631 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3633 for inst in non_primary_inst:
3634 test = inst in self.all_inst_info
3635 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3636 "instance should not run on node %s", node_i.name)
3637 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3638 "node is running unknown instance %s", inst)
3640 self._VerifyGroupLVM(node_image, vg_name)
3642 for node, result in extra_lv_nvinfo.items():
3643 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3644 node_image[node], vg_name)
3646 feedback_fn("* Verifying instance status")
3647 for instance in self.my_inst_names:
3649 feedback_fn("* Verifying instance %s" % instance)
3650 inst_config = self.my_inst_info[instance]
3651 self._VerifyInstance(instance, inst_config, node_image,
3654 # If the instance is non-redundant we cannot survive losing its primary
3655 # node, so we are not N+1 compliant.
3656 if inst_config.disk_template not in constants.DTS_MIRRORED:
3657 i_non_redundant.append(instance)
3659 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3660 i_non_a_balanced.append(instance)
3662 feedback_fn("* Verifying orphan volumes")
3663 reserved = utils.FieldSet(*cluster.reserved_lvs)
3665 # We will get spurious "unknown volume" warnings if any node of this group
3666 # is secondary for an instance whose primary is in another group. To avoid
3667 # them, we find these instances and add their volumes to node_vol_should.
3668 for inst in self.all_inst_info.values():
3669 for secondary in inst.secondary_nodes:
3670 if (secondary in self.my_node_info
3671 and inst.name not in self.my_inst_info):
3672 inst.MapLVsByNode(node_vol_should)
3675 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3677 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3678 feedback_fn("* Verifying N+1 Memory redundancy")
3679 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3681 feedback_fn("* Other Notes")
3683 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3684 % len(i_non_redundant))
3686 if i_non_a_balanced:
3687 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3688 % len(i_non_a_balanced))
3691 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3694 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3697 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3701 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3702 """Analyze the post-hooks' result
3704 This method analyses the hook result, handles it, and sends some
3705 nicely-formatted feedback back to the user.
3707 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3708 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3709 @param hooks_results: the results of the multi-node hooks rpc call
3710 @param feedback_fn: function used send feedback back to the caller
3711 @param lu_result: previous Exec result
3712 @return: the new Exec result, based on the previous result
3716 # We only really run POST phase hooks, only for non-empty groups,
3717 # and are only interested in their results
3718 if not self.my_node_names:
3721 elif phase == constants.HOOKS_PHASE_POST:
3722 # Used to change hooks' output to proper indentation
3723 feedback_fn("* Hooks Results")
3724 assert hooks_results, "invalid result from hooks"
3726 for node_name in hooks_results:
3727 res = hooks_results[node_name]
3729 test = msg and not res.offline
3730 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3731 "Communication failure in hooks execution: %s", msg)
3732 if res.offline or msg:
3733 # No need to investigate payload if node is offline or gave
3736 for script, hkr, output in res.payload:
3737 test = hkr == constants.HKR_FAIL
3738 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3739 "Script %s failed, output:", script)
3741 output = self._HOOKS_INDENT_RE.sub(" ", output)
3742 feedback_fn("%s" % output)
3748 class LUClusterVerifyDisks(NoHooksLU):
3749 """Verifies the cluster disks status.
3754 def ExpandNames(self):
3755 self.share_locks = _ShareAll()
3756 self.needed_locks = {
3757 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3760 def Exec(self, feedback_fn):
3761 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3763 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3764 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3765 for group in group_names])
3768 class LUGroupVerifyDisks(NoHooksLU):
3769 """Verifies the status of all disks in a node group.
3774 def ExpandNames(self):
3775 # Raises errors.OpPrereqError on its own if group can't be found
3776 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3778 self.share_locks = _ShareAll()
3779 self.needed_locks = {
3780 locking.LEVEL_INSTANCE: [],
3781 locking.LEVEL_NODEGROUP: [],
3782 locking.LEVEL_NODE: [],
3784 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3785 # starts one instance of this opcode for every group, which means all
3786 # nodes will be locked for a short amount of time, so it's better to
3787 # acquire the node allocation lock as well.
3788 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3791 def DeclareLocks(self, level):
3792 if level == locking.LEVEL_INSTANCE:
3793 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3795 # Lock instances optimistically, needs verification once node and group
3796 # locks have been acquired
3797 self.needed_locks[locking.LEVEL_INSTANCE] = \
3798 self.cfg.GetNodeGroupInstances(self.group_uuid)
3800 elif level == locking.LEVEL_NODEGROUP:
3801 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3803 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3804 set([self.group_uuid] +
3805 # Lock all groups used by instances optimistically; this requires
3806 # going via the node before it's locked, requiring verification
3809 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3810 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3812 elif level == locking.LEVEL_NODE:
3813 # This will only lock the nodes in the group to be verified which contain
3815 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3816 self._LockInstancesNodes()
3818 # Lock all nodes in group to be verified
3819 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3820 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3821 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3823 def CheckPrereq(self):
3824 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3825 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3826 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3828 assert self.group_uuid in owned_groups
3830 # Check if locked instances are still correct
3831 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3833 # Get instance information
3834 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3836 # Check if node groups for locked instances are still correct
3837 _CheckInstancesNodeGroups(self.cfg, self.instances,
3838 owned_groups, owned_nodes, self.group_uuid)
3840 def Exec(self, feedback_fn):
3841 """Verify integrity of cluster disks.
3843 @rtype: tuple of three items
3844 @return: a tuple of (dict of node-to-node_error, list of instances
3845 which need activate-disks, dict of instance: (node, volume) for
3850 res_instances = set()
3853 nv_dict = _MapInstanceDisksToNodes(
3854 [inst for inst in self.instances.values()
3855 if inst.admin_state == constants.ADMINST_UP])
3858 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3859 set(self.cfg.GetVmCapableNodeList()))
3861 node_lvs = self.rpc.call_lv_list(nodes, [])
3863 for (node, node_res) in node_lvs.items():
3864 if node_res.offline:
3867 msg = node_res.fail_msg
3869 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3870 res_nodes[node] = msg
3873 for lv_name, (_, _, lv_online) in node_res.payload.items():
3874 inst = nv_dict.pop((node, lv_name), None)
3875 if not (lv_online or inst is None):
3876 res_instances.add(inst)
3878 # any leftover items in nv_dict are missing LVs, let's arrange the data
3880 for key, inst in nv_dict.iteritems():
3881 res_missing.setdefault(inst, []).append(list(key))
3883 return (res_nodes, list(res_instances), res_missing)
3886 class LUClusterRepairDiskSizes(NoHooksLU):
3887 """Verifies the cluster disks sizes.
3892 def ExpandNames(self):
3893 if self.op.instances:
3894 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3895 # Not getting the node allocation lock as only a specific set of
3896 # instances (and their nodes) is going to be acquired
3897 self.needed_locks = {
3898 locking.LEVEL_NODE_RES: [],
3899 locking.LEVEL_INSTANCE: self.wanted_names,
3901 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3903 self.wanted_names = None
3904 self.needed_locks = {
3905 locking.LEVEL_NODE_RES: locking.ALL_SET,
3906 locking.LEVEL_INSTANCE: locking.ALL_SET,
3908 # This opcode is acquires the node locks for all instances
3909 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3912 self.share_locks = {
3913 locking.LEVEL_NODE_RES: 1,
3914 locking.LEVEL_INSTANCE: 0,
3915 locking.LEVEL_NODE_ALLOC: 1,
3918 def DeclareLocks(self, level):
3919 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3920 self._LockInstancesNodes(primary_only=True, level=level)
3922 def CheckPrereq(self):
3923 """Check prerequisites.
3925 This only checks the optional instance list against the existing names.
3928 if self.wanted_names is None:
3929 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3931 self.wanted_instances = \
3932 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3934 def _EnsureChildSizes(self, disk):
3935 """Ensure children of the disk have the needed disk size.
3937 This is valid mainly for DRBD8 and fixes an issue where the
3938 children have smaller disk size.
3940 @param disk: an L{ganeti.objects.Disk} object
3943 if disk.dev_type == constants.LD_DRBD8:
3944 assert disk.children, "Empty children for DRBD8?"
3945 fchild = disk.children[0]
3946 mismatch = fchild.size < disk.size
3948 self.LogInfo("Child disk has size %d, parent %d, fixing",
3949 fchild.size, disk.size)
3950 fchild.size = disk.size
3952 # and we recurse on this child only, not on the metadev
3953 return self._EnsureChildSizes(fchild) or mismatch
3957 def Exec(self, feedback_fn):
3958 """Verify the size of cluster disks.
3961 # TODO: check child disks too
3962 # TODO: check differences in size between primary/secondary nodes
3964 for instance in self.wanted_instances:
3965 pnode = instance.primary_node
3966 if pnode not in per_node_disks:
3967 per_node_disks[pnode] = []
3968 for idx, disk in enumerate(instance.disks):
3969 per_node_disks[pnode].append((instance, idx, disk))
3971 assert not (frozenset(per_node_disks.keys()) -
3972 self.owned_locks(locking.LEVEL_NODE_RES)), \
3973 "Not owning correct locks"
3974 assert not self.owned_locks(locking.LEVEL_NODE)
3977 for node, dskl in per_node_disks.items():
3978 newl = [v[2].Copy() for v in dskl]
3980 self.cfg.SetDiskID(dsk, node)
3981 result = self.rpc.call_blockdev_getsize(node, newl)
3983 self.LogWarning("Failure in blockdev_getsize call to node"
3984 " %s, ignoring", node)
3986 if len(result.payload) != len(dskl):
3987 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3988 " result.payload=%s", node, len(dskl), result.payload)
3989 self.LogWarning("Invalid result from node %s, ignoring node results",
3992 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3994 self.LogWarning("Disk %d of instance %s did not return size"
3995 " information, ignoring", idx, instance.name)
3997 if not isinstance(size, (int, long)):
3998 self.LogWarning("Disk %d of instance %s did not return valid"
3999 " size information, ignoring", idx, instance.name)
4002 if size != disk.size:
4003 self.LogInfo("Disk %d of instance %s has mismatched size,"
4004 " correcting: recorded %d, actual %d", idx,
4005 instance.name, disk.size, size)
4007 self.cfg.Update(instance, feedback_fn)
4008 changed.append((instance.name, idx, size))
4009 if self._EnsureChildSizes(disk):
4010 self.cfg.Update(instance, feedback_fn)
4011 changed.append((instance.name, idx, disk.size))
4015 class LUClusterRename(LogicalUnit):
4016 """Rename the cluster.
4019 HPATH = "cluster-rename"
4020 HTYPE = constants.HTYPE_CLUSTER
4022 def BuildHooksEnv(self):
4027 "OP_TARGET": self.cfg.GetClusterName(),
4028 "NEW_NAME": self.op.name,
4031 def BuildHooksNodes(self):
4032 """Build hooks nodes.
4035 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4037 def CheckPrereq(self):
4038 """Verify that the passed name is a valid one.
4041 hostname = netutils.GetHostname(name=self.op.name,
4042 family=self.cfg.GetPrimaryIPFamily())
4044 new_name = hostname.name
4045 self.ip = new_ip = hostname.ip
4046 old_name = self.cfg.GetClusterName()
4047 old_ip = self.cfg.GetMasterIP()
4048 if new_name == old_name and new_ip == old_ip:
4049 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4050 " cluster has changed",
4052 if new_ip != old_ip:
4053 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4054 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4055 " reachable on the network" %
4056 new_ip, errors.ECODE_NOTUNIQUE)
4058 self.op.name = new_name
4060 def Exec(self, feedback_fn):
4061 """Rename the cluster.
4064 clustername = self.op.name
4067 # shutdown the master IP
4068 master_params = self.cfg.GetMasterNetworkParameters()
4069 ems = self.cfg.GetUseExternalMipScript()
4070 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4072 result.Raise("Could not disable the master role")
4075 cluster = self.cfg.GetClusterInfo()
4076 cluster.cluster_name = clustername
4077 cluster.master_ip = new_ip
4078 self.cfg.Update(cluster, feedback_fn)
4080 # update the known hosts file
4081 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4082 node_list = self.cfg.GetOnlineNodeList()
4084 node_list.remove(master_params.name)
4087 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4089 master_params.ip = new_ip
4090 result = self.rpc.call_node_activate_master_ip(master_params.name,
4092 msg = result.fail_msg
4094 self.LogWarning("Could not re-enable the master role on"
4095 " the master, please restart manually: %s", msg)
4100 def _ValidateNetmask(cfg, netmask):
4101 """Checks if a netmask is valid.
4103 @type cfg: L{config.ConfigWriter}
4104 @param cfg: The cluster configuration
4106 @param netmask: the netmask to be verified
4107 @raise errors.OpPrereqError: if the validation fails
4110 ip_family = cfg.GetPrimaryIPFamily()
4112 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4113 except errors.ProgrammerError:
4114 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4115 ip_family, errors.ECODE_INVAL)
4116 if not ipcls.ValidateNetmask(netmask):
4117 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4118 (netmask), errors.ECODE_INVAL)
4121 class LUClusterSetParams(LogicalUnit):
4122 """Change the parameters of the cluster.
4125 HPATH = "cluster-modify"
4126 HTYPE = constants.HTYPE_CLUSTER
4129 def CheckArguments(self):
4133 if self.op.uid_pool:
4134 uidpool.CheckUidPool(self.op.uid_pool)
4136 if self.op.add_uids:
4137 uidpool.CheckUidPool(self.op.add_uids)
4139 if self.op.remove_uids:
4140 uidpool.CheckUidPool(self.op.remove_uids)
4142 if self.op.master_netmask is not None:
4143 _ValidateNetmask(self.cfg, self.op.master_netmask)
4145 if self.op.diskparams:
4146 for dt_params in self.op.diskparams.values():
4147 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4149 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4150 except errors.OpPrereqError, err:
4151 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4154 def ExpandNames(self):
4155 # FIXME: in the future maybe other cluster params won't require checking on
4156 # all nodes to be modified.
4157 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4158 # resource locks the right thing, shouldn't it be the BGL instead?
4159 self.needed_locks = {
4160 locking.LEVEL_NODE: locking.ALL_SET,
4161 locking.LEVEL_INSTANCE: locking.ALL_SET,
4162 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4163 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4165 self.share_locks = _ShareAll()
4167 def BuildHooksEnv(self):
4172 "OP_TARGET": self.cfg.GetClusterName(),
4173 "NEW_VG_NAME": self.op.vg_name,
4176 def BuildHooksNodes(self):
4177 """Build hooks nodes.
4180 mn = self.cfg.GetMasterNode()
4183 def CheckPrereq(self):
4184 """Check prerequisites.
4186 This checks whether the given params don't conflict and
4187 if the given volume group is valid.
4190 if self.op.vg_name is not None and not self.op.vg_name:
4191 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4192 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4193 " instances exist", errors.ECODE_INVAL)
4195 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4196 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4197 raise errors.OpPrereqError("Cannot disable drbd helper while"
4198 " drbd-based instances exist",
4201 node_list = self.owned_locks(locking.LEVEL_NODE)
4203 # if vg_name not None, checks given volume group on all nodes
4205 vglist = self.rpc.call_vg_list(node_list)
4206 for node in node_list:
4207 msg = vglist[node].fail_msg
4209 # ignoring down node
4210 self.LogWarning("Error while gathering data on node %s"
4211 " (ignoring node): %s", node, msg)
4213 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4215 constants.MIN_VG_SIZE)
4217 raise errors.OpPrereqError("Error on node '%s': %s" %
4218 (node, vgstatus), errors.ECODE_ENVIRON)
4220 if self.op.drbd_helper:
4221 # checks given drbd helper on all nodes
4222 helpers = self.rpc.call_drbd_helper(node_list)
4223 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4225 self.LogInfo("Not checking drbd helper on offline node %s", node)
4227 msg = helpers[node].fail_msg
4229 raise errors.OpPrereqError("Error checking drbd helper on node"
4230 " '%s': %s" % (node, msg),
4231 errors.ECODE_ENVIRON)
4232 node_helper = helpers[node].payload
4233 if node_helper != self.op.drbd_helper:
4234 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4235 (node, node_helper), errors.ECODE_ENVIRON)
4237 self.cluster = cluster = self.cfg.GetClusterInfo()
4238 # validate params changes
4239 if self.op.beparams:
4240 objects.UpgradeBeParams(self.op.beparams)
4241 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4242 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4244 if self.op.ndparams:
4245 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4246 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4248 # TODO: we need a more general way to handle resetting
4249 # cluster-level parameters to default values
4250 if self.new_ndparams["oob_program"] == "":
4251 self.new_ndparams["oob_program"] = \
4252 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4254 if self.op.hv_state:
4255 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4256 self.cluster.hv_state_static)
4257 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4258 for hv, values in new_hv_state.items())
4260 if self.op.disk_state:
4261 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4262 self.cluster.disk_state_static)
4263 self.new_disk_state = \
4264 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4265 for name, values in svalues.items()))
4266 for storage, svalues in new_disk_state.items())
4269 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4272 all_instances = self.cfg.GetAllInstancesInfo().values()
4274 for group in self.cfg.GetAllNodeGroupsInfo().values():
4275 instances = frozenset([inst for inst in all_instances
4276 if compat.any(node in group.members
4277 for node in inst.all_nodes)])
4278 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4279 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4280 new = _ComputeNewInstanceViolations(ipol,
4281 new_ipolicy, instances)
4283 violations.update(new)
4286 self.LogWarning("After the ipolicy change the following instances"
4287 " violate them: %s",
4288 utils.CommaJoin(utils.NiceSort(violations)))
4290 if self.op.nicparams:
4291 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4292 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4293 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4296 # check all instances for consistency
4297 for instance in self.cfg.GetAllInstancesInfo().values():
4298 for nic_idx, nic in enumerate(instance.nics):
4299 params_copy = copy.deepcopy(nic.nicparams)
4300 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4302 # check parameter syntax
4304 objects.NIC.CheckParameterSyntax(params_filled)
4305 except errors.ConfigurationError, err:
4306 nic_errors.append("Instance %s, nic/%d: %s" %
4307 (instance.name, nic_idx, err))
4309 # if we're moving instances to routed, check that they have an ip
4310 target_mode = params_filled[constants.NIC_MODE]
4311 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4312 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4313 " address" % (instance.name, nic_idx))
4315 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4316 "\n".join(nic_errors), errors.ECODE_INVAL)
4318 # hypervisor list/parameters
4319 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4320 if self.op.hvparams:
4321 for hv_name, hv_dict in self.op.hvparams.items():
4322 if hv_name not in self.new_hvparams:
4323 self.new_hvparams[hv_name] = hv_dict
4325 self.new_hvparams[hv_name].update(hv_dict)
4327 # disk template parameters
4328 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4329 if self.op.diskparams:
4330 for dt_name, dt_params in self.op.diskparams.items():
4331 if dt_name not in self.op.diskparams:
4332 self.new_diskparams[dt_name] = dt_params
4334 self.new_diskparams[dt_name].update(dt_params)
4336 # os hypervisor parameters
4337 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4339 for os_name, hvs in self.op.os_hvp.items():
4340 if os_name not in self.new_os_hvp:
4341 self.new_os_hvp[os_name] = hvs
4343 for hv_name, hv_dict in hvs.items():
4345 # Delete if it exists
4346 self.new_os_hvp[os_name].pop(hv_name, None)
4347 elif hv_name not in self.new_os_hvp[os_name]:
4348 self.new_os_hvp[os_name][hv_name] = hv_dict
4350 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4353 self.new_osp = objects.FillDict(cluster.osparams, {})
4354 if self.op.osparams:
4355 for os_name, osp in self.op.osparams.items():
4356 if os_name not in self.new_osp:
4357 self.new_osp[os_name] = {}
4359 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4362 if not self.new_osp[os_name]:
4363 # we removed all parameters
4364 del self.new_osp[os_name]
4366 # check the parameter validity (remote check)
4367 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4368 os_name, self.new_osp[os_name])
4370 # changes to the hypervisor list
4371 if self.op.enabled_hypervisors is not None:
4372 self.hv_list = self.op.enabled_hypervisors
4373 for hv in self.hv_list:
4374 # if the hypervisor doesn't already exist in the cluster
4375 # hvparams, we initialize it to empty, and then (in both
4376 # cases) we make sure to fill the defaults, as we might not
4377 # have a complete defaults list if the hypervisor wasn't
4379 if hv not in new_hvp:
4381 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4382 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4384 self.hv_list = cluster.enabled_hypervisors
4386 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4387 # either the enabled list has changed, or the parameters have, validate
4388 for hv_name, hv_params in self.new_hvparams.items():
4389 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4390 (self.op.enabled_hypervisors and
4391 hv_name in self.op.enabled_hypervisors)):
4392 # either this is a new hypervisor, or its parameters have changed
4393 hv_class = hypervisor.GetHypervisorClass(hv_name)
4394 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4395 hv_class.CheckParameterSyntax(hv_params)
4396 _CheckHVParams(self, node_list, hv_name, hv_params)
4399 # no need to check any newly-enabled hypervisors, since the
4400 # defaults have already been checked in the above code-block
4401 for os_name, os_hvp in self.new_os_hvp.items():
4402 for hv_name, hv_params in os_hvp.items():
4403 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4404 # we need to fill in the new os_hvp on top of the actual hv_p
4405 cluster_defaults = self.new_hvparams.get(hv_name, {})
4406 new_osp = objects.FillDict(cluster_defaults, hv_params)
4407 hv_class = hypervisor.GetHypervisorClass(hv_name)
4408 hv_class.CheckParameterSyntax(new_osp)
4409 _CheckHVParams(self, node_list, hv_name, new_osp)
4411 if self.op.default_iallocator:
4412 alloc_script = utils.FindFile(self.op.default_iallocator,
4413 constants.IALLOCATOR_SEARCH_PATH,
4415 if alloc_script is None:
4416 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4417 " specified" % self.op.default_iallocator,
4420 def Exec(self, feedback_fn):
4421 """Change the parameters of the cluster.
4424 if self.op.vg_name is not None:
4425 new_volume = self.op.vg_name
4428 if new_volume != self.cfg.GetVGName():
4429 self.cfg.SetVGName(new_volume)
4431 feedback_fn("Cluster LVM configuration already in desired"
4432 " state, not changing")
4433 if self.op.drbd_helper is not None:
4434 new_helper = self.op.drbd_helper
4437 if new_helper != self.cfg.GetDRBDHelper():
4438 self.cfg.SetDRBDHelper(new_helper)
4440 feedback_fn("Cluster DRBD helper already in desired state,"
4442 if self.op.hvparams:
4443 self.cluster.hvparams = self.new_hvparams
4445 self.cluster.os_hvp = self.new_os_hvp
4446 if self.op.enabled_hypervisors is not None:
4447 self.cluster.hvparams = self.new_hvparams
4448 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4449 if self.op.beparams:
4450 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4451 if self.op.nicparams:
4452 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4454 self.cluster.ipolicy = self.new_ipolicy
4455 if self.op.osparams:
4456 self.cluster.osparams = self.new_osp
4457 if self.op.ndparams:
4458 self.cluster.ndparams = self.new_ndparams
4459 if self.op.diskparams:
4460 self.cluster.diskparams = self.new_diskparams
4461 if self.op.hv_state:
4462 self.cluster.hv_state_static = self.new_hv_state
4463 if self.op.disk_state:
4464 self.cluster.disk_state_static = self.new_disk_state
4466 if self.op.candidate_pool_size is not None:
4467 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4468 # we need to update the pool size here, otherwise the save will fail
4469 _AdjustCandidatePool(self, [])
4471 if self.op.maintain_node_health is not None:
4472 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4473 feedback_fn("Note: CONFD was disabled at build time, node health"
4474 " maintenance is not useful (still enabling it)")
4475 self.cluster.maintain_node_health = self.op.maintain_node_health
4477 if self.op.prealloc_wipe_disks is not None:
4478 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4480 if self.op.add_uids is not None:
4481 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4483 if self.op.remove_uids is not None:
4484 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4486 if self.op.uid_pool is not None:
4487 self.cluster.uid_pool = self.op.uid_pool
4489 if self.op.default_iallocator is not None:
4490 self.cluster.default_iallocator = self.op.default_iallocator
4492 if self.op.reserved_lvs is not None:
4493 self.cluster.reserved_lvs = self.op.reserved_lvs
4495 if self.op.use_external_mip_script is not None:
4496 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4498 def helper_os(aname, mods, desc):
4500 lst = getattr(self.cluster, aname)
4501 for key, val in mods:
4502 if key == constants.DDM_ADD:
4504 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4507 elif key == constants.DDM_REMOVE:
4511 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4513 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4515 if self.op.hidden_os:
4516 helper_os("hidden_os", self.op.hidden_os, "hidden")
4518 if self.op.blacklisted_os:
4519 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4521 if self.op.master_netdev:
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 ems = self.cfg.GetUseExternalMipScript()
4524 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4525 self.cluster.master_netdev)
4526 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4528 result.Raise("Could not disable the master ip")
4529 feedback_fn("Changing master_netdev from %s to %s" %
4530 (master_params.netdev, self.op.master_netdev))
4531 self.cluster.master_netdev = self.op.master_netdev
4533 if self.op.master_netmask:
4534 master_params = self.cfg.GetMasterNetworkParameters()
4535 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4536 result = self.rpc.call_node_change_master_netmask(master_params.name,
4537 master_params.netmask,
4538 self.op.master_netmask,
4540 master_params.netdev)
4542 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4545 self.cluster.master_netmask = self.op.master_netmask
4547 self.cfg.Update(self.cluster, feedback_fn)
4549 if self.op.master_netdev:
4550 master_params = self.cfg.GetMasterNetworkParameters()
4551 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4552 self.op.master_netdev)
4553 ems = self.cfg.GetUseExternalMipScript()
4554 result = self.rpc.call_node_activate_master_ip(master_params.name,
4557 self.LogWarning("Could not re-enable the master ip on"
4558 " the master, please restart manually: %s",
4562 def _UploadHelper(lu, nodes, fname):
4563 """Helper for uploading a file and showing warnings.
4566 if os.path.exists(fname):
4567 result = lu.rpc.call_upload_file(nodes, fname)
4568 for to_node, to_result in result.items():
4569 msg = to_result.fail_msg
4571 msg = ("Copy of file %s to node %s failed: %s" %
4572 (fname, to_node, msg))
4576 def _ComputeAncillaryFiles(cluster, redist):
4577 """Compute files external to Ganeti which need to be consistent.
4579 @type redist: boolean
4580 @param redist: Whether to include files which need to be redistributed
4583 # Compute files for all nodes
4585 pathutils.SSH_KNOWN_HOSTS_FILE,
4586 pathutils.CONFD_HMAC_KEY,
4587 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4588 pathutils.SPICE_CERT_FILE,
4589 pathutils.SPICE_CACERT_FILE,
4590 pathutils.RAPI_USERS_FILE,
4594 # we need to ship at least the RAPI certificate
4595 files_all.add(pathutils.RAPI_CERT_FILE)
4597 files_all.update(pathutils.ALL_CERT_FILES)
4598 files_all.update(ssconf.SimpleStore().GetFileList())
4600 if cluster.modify_etc_hosts:
4601 files_all.add(pathutils.ETC_HOSTS)
4603 if cluster.use_external_mip_script:
4604 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4606 # Files which are optional, these must:
4607 # - be present in one other category as well
4608 # - either exist or not exist on all nodes of that category (mc, vm all)
4610 pathutils.RAPI_USERS_FILE,
4613 # Files which should only be on master candidates
4617 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4621 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4622 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4623 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4625 # Files which should only be on VM-capable nodes
4628 for hv_name in cluster.enabled_hypervisors
4630 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4634 for hv_name in cluster.enabled_hypervisors
4636 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4638 # Filenames in each category must be unique
4639 all_files_set = files_all | files_mc | files_vm
4640 assert (len(all_files_set) ==
4641 sum(map(len, [files_all, files_mc, files_vm]))), \
4642 "Found file listed in more than one file list"
4644 # Optional files must be present in one other category
4645 assert all_files_set.issuperset(files_opt), \
4646 "Optional file not in a different required list"
4648 # This one file should never ever be re-distributed via RPC
4649 assert not (redist and
4650 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4652 return (files_all, files_opt, files_mc, files_vm)
4655 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4656 """Distribute additional files which are part of the cluster configuration.
4658 ConfigWriter takes care of distributing the config and ssconf files, but
4659 there are more files which should be distributed to all nodes. This function
4660 makes sure those are copied.
4662 @param lu: calling logical unit
4663 @param additional_nodes: list of nodes not in the config to distribute to
4664 @type additional_vm: boolean
4665 @param additional_vm: whether the additional nodes are vm-capable or not
4668 # Gather target nodes
4669 cluster = lu.cfg.GetClusterInfo()
4670 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4672 online_nodes = lu.cfg.GetOnlineNodeList()
4673 online_set = frozenset(online_nodes)
4674 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4676 if additional_nodes is not None:
4677 online_nodes.extend(additional_nodes)
4679 vm_nodes.extend(additional_nodes)
4681 # Never distribute to master node
4682 for nodelist in [online_nodes, vm_nodes]:
4683 if master_info.name in nodelist:
4684 nodelist.remove(master_info.name)
4687 (files_all, _, files_mc, files_vm) = \
4688 _ComputeAncillaryFiles(cluster, True)
4690 # Never re-distribute configuration file from here
4691 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4692 pathutils.CLUSTER_CONF_FILE in files_vm)
4693 assert not files_mc, "Master candidates not handled in this function"
4696 (online_nodes, files_all),
4697 (vm_nodes, files_vm),
4701 for (node_list, files) in filemap:
4703 _UploadHelper(lu, node_list, fname)
4706 class LUClusterRedistConf(NoHooksLU):
4707 """Force the redistribution of cluster configuration.
4709 This is a very simple LU.
4714 def ExpandNames(self):
4715 self.needed_locks = {
4716 locking.LEVEL_NODE: locking.ALL_SET,
4717 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4719 self.share_locks = _ShareAll()
4721 def Exec(self, feedback_fn):
4722 """Redistribute the configuration.
4725 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4726 _RedistributeAncillaryFiles(self)
4729 class LUClusterActivateMasterIp(NoHooksLU):
4730 """Activate the master IP on the master node.
4733 def Exec(self, feedback_fn):
4734 """Activate the master IP.
4737 master_params = self.cfg.GetMasterNetworkParameters()
4738 ems = self.cfg.GetUseExternalMipScript()
4739 result = self.rpc.call_node_activate_master_ip(master_params.name,
4741 result.Raise("Could not activate the master IP")
4744 class LUClusterDeactivateMasterIp(NoHooksLU):
4745 """Deactivate the master IP on the master node.
4748 def Exec(self, feedback_fn):
4749 """Deactivate the master IP.
4752 master_params = self.cfg.GetMasterNetworkParameters()
4753 ems = self.cfg.GetUseExternalMipScript()
4754 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4756 result.Raise("Could not deactivate the master IP")
4759 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4760 """Sleep and poll for an instance's disk to sync.
4763 if not instance.disks or disks is not None and not disks:
4766 disks = _ExpandCheckDisks(instance, disks)
4769 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4771 node = instance.primary_node
4774 lu.cfg.SetDiskID(dev, node)
4776 # TODO: Convert to utils.Retry
4779 degr_retries = 10 # in seconds, as we sleep 1 second each time
4783 cumul_degraded = False
4784 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4785 msg = rstats.fail_msg
4787 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4790 raise errors.RemoteError("Can't contact node %s for mirror data,"
4791 " aborting." % node)
4794 rstats = rstats.payload
4796 for i, mstat in enumerate(rstats):
4798 lu.LogWarning("Can't compute data for node %s/%s",
4799 node, disks[i].iv_name)
4802 cumul_degraded = (cumul_degraded or
4803 (mstat.is_degraded and mstat.sync_percent is None))
4804 if mstat.sync_percent is not None:
4806 if mstat.estimated_time is not None:
4807 rem_time = ("%s remaining (estimated)" %
4808 utils.FormatSeconds(mstat.estimated_time))
4809 max_time = mstat.estimated_time
4811 rem_time = "no time estimate"
4812 lu.LogInfo("- device %s: %5.2f%% done, %s",
4813 disks[i].iv_name, mstat.sync_percent, rem_time)
4815 # if we're done but degraded, let's do a few small retries, to
4816 # make sure we see a stable and not transient situation; therefore
4817 # we force restart of the loop
4818 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4819 logging.info("Degraded disks found, %d retries left", degr_retries)
4827 time.sleep(min(60, max_time))
4830 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4832 return not cumul_degraded
4835 def _BlockdevFind(lu, node, dev, instance):
4836 """Wrapper around call_blockdev_find to annotate diskparams.
4838 @param lu: A reference to the lu object
4839 @param node: The node to call out
4840 @param dev: The device to find
4841 @param instance: The instance object the device belongs to
4842 @returns The result of the rpc call
4845 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4846 return lu.rpc.call_blockdev_find(node, disk)
4849 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4850 """Wrapper around L{_CheckDiskConsistencyInner}.
4853 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4854 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4858 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4860 """Check that mirrors are not degraded.
4862 @attention: The device has to be annotated already.
4864 The ldisk parameter, if True, will change the test from the
4865 is_degraded attribute (which represents overall non-ok status for
4866 the device(s)) to the ldisk (representing the local storage status).
4869 lu.cfg.SetDiskID(dev, node)
4873 if on_primary or dev.AssembleOnSecondary():
4874 rstats = lu.rpc.call_blockdev_find(node, dev)
4875 msg = rstats.fail_msg
4877 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4879 elif not rstats.payload:
4880 lu.LogWarning("Can't find disk on node %s", node)
4884 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4886 result = result and not rstats.payload.is_degraded
4889 for child in dev.children:
4890 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4896 class LUOobCommand(NoHooksLU):
4897 """Logical unit for OOB handling.
4901 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4903 def ExpandNames(self):
4904 """Gather locks we need.
4907 if self.op.node_names:
4908 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4909 lock_names = self.op.node_names
4911 lock_names = locking.ALL_SET
4913 self.needed_locks = {
4914 locking.LEVEL_NODE: lock_names,
4917 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4919 if not self.op.node_names:
4920 # Acquire node allocation lock only if all nodes are affected
4921 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4923 def CheckPrereq(self):
4924 """Check prerequisites.
4927 - the node exists in the configuration
4930 Any errors are signaled by raising errors.OpPrereqError.
4934 self.master_node = self.cfg.GetMasterNode()
4936 assert self.op.power_delay >= 0.0
4938 if self.op.node_names:
4939 if (self.op.command in self._SKIP_MASTER and
4940 self.master_node in self.op.node_names):
4941 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4942 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4944 if master_oob_handler:
4945 additional_text = ("run '%s %s %s' if you want to operate on the"
4946 " master regardless") % (master_oob_handler,
4950 additional_text = "it does not support out-of-band operations"
4952 raise errors.OpPrereqError(("Operating on the master node %s is not"
4953 " allowed for %s; %s") %
4954 (self.master_node, self.op.command,
4955 additional_text), errors.ECODE_INVAL)
4957 self.op.node_names = self.cfg.GetNodeList()
4958 if self.op.command in self._SKIP_MASTER:
4959 self.op.node_names.remove(self.master_node)
4961 if self.op.command in self._SKIP_MASTER:
4962 assert self.master_node not in self.op.node_names
4964 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4966 raise errors.OpPrereqError("Node %s not found" % node_name,
4969 self.nodes.append(node)
4971 if (not self.op.ignore_status and
4972 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4973 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4974 " not marked offline") % node_name,
4977 def Exec(self, feedback_fn):
4978 """Execute OOB and return result if we expect any.
4981 master_node = self.master_node
4984 for idx, node in enumerate(utils.NiceSort(self.nodes,
4985 key=lambda node: node.name)):
4986 node_entry = [(constants.RS_NORMAL, node.name)]
4987 ret.append(node_entry)
4989 oob_program = _SupportsOob(self.cfg, node)
4992 node_entry.append((constants.RS_UNAVAIL, None))
4995 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4996 self.op.command, oob_program, node.name)
4997 result = self.rpc.call_run_oob(master_node, oob_program,
4998 self.op.command, node.name,
5002 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5003 node.name, result.fail_msg)
5004 node_entry.append((constants.RS_NODATA, None))
5007 self._CheckPayload(result)
5008 except errors.OpExecError, err:
5009 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5011 node_entry.append((constants.RS_NODATA, None))
5013 if self.op.command == constants.OOB_HEALTH:
5014 # For health we should log important events
5015 for item, status in result.payload:
5016 if status in [constants.OOB_STATUS_WARNING,
5017 constants.OOB_STATUS_CRITICAL]:
5018 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5019 item, node.name, status)
5021 if self.op.command == constants.OOB_POWER_ON:
5023 elif self.op.command == constants.OOB_POWER_OFF:
5024 node.powered = False
5025 elif self.op.command == constants.OOB_POWER_STATUS:
5026 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5027 if powered != node.powered:
5028 logging.warning(("Recorded power state (%s) of node '%s' does not"
5029 " match actual power state (%s)"), node.powered,
5032 # For configuration changing commands we should update the node
5033 if self.op.command in (constants.OOB_POWER_ON,
5034 constants.OOB_POWER_OFF):
5035 self.cfg.Update(node, feedback_fn)
5037 node_entry.append((constants.RS_NORMAL, result.payload))
5039 if (self.op.command == constants.OOB_POWER_ON and
5040 idx < len(self.nodes) - 1):
5041 time.sleep(self.op.power_delay)
5045 def _CheckPayload(self, result):
5046 """Checks if the payload is valid.
5048 @param result: RPC result
5049 @raises errors.OpExecError: If payload is not valid
5053 if self.op.command == constants.OOB_HEALTH:
5054 if not isinstance(result.payload, list):
5055 errs.append("command 'health' is expected to return a list but got %s" %
5056 type(result.payload))
5058 for item, status in result.payload:
5059 if status not in constants.OOB_STATUSES:
5060 errs.append("health item '%s' has invalid status '%s'" %
5063 if self.op.command == constants.OOB_POWER_STATUS:
5064 if not isinstance(result.payload, dict):
5065 errs.append("power-status is expected to return a dict but got %s" %
5066 type(result.payload))
5068 if self.op.command in [
5069 constants.OOB_POWER_ON,
5070 constants.OOB_POWER_OFF,
5071 constants.OOB_POWER_CYCLE,
5073 if result.payload is not None:
5074 errs.append("%s is expected to not return payload but got '%s'" %
5075 (self.op.command, result.payload))
5078 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5079 utils.CommaJoin(errs))
5082 class _OsQuery(_QueryBase):
5083 FIELDS = query.OS_FIELDS
5085 def ExpandNames(self, lu):
5086 # Lock all nodes in shared mode
5087 # Temporary removal of locks, should be reverted later
5088 # TODO: reintroduce locks when they are lighter-weight
5089 lu.needed_locks = {}
5090 #self.share_locks[locking.LEVEL_NODE] = 1
5091 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5093 # The following variables interact with _QueryBase._GetNames
5095 self.wanted = self.names
5097 self.wanted = locking.ALL_SET
5099 self.do_locking = self.use_locking
5101 def DeclareLocks(self, lu, level):
5105 def _DiagnoseByOS(rlist):
5106 """Remaps a per-node return list into an a per-os per-node dictionary
5108 @param rlist: a map with node names as keys and OS objects as values
5111 @return: a dictionary with osnames as keys and as value another
5112 map, with nodes as keys and tuples of (path, status, diagnose,
5113 variants, parameters, api_versions) as values, eg::
5115 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5116 (/srv/..., False, "invalid api")],
5117 "node2": [(/srv/..., True, "", [], [])]}
5122 # we build here the list of nodes that didn't fail the RPC (at RPC
5123 # level), so that nodes with a non-responding node daemon don't
5124 # make all OSes invalid
5125 good_nodes = [node_name for node_name in rlist
5126 if not rlist[node_name].fail_msg]
5127 for node_name, nr in rlist.items():
5128 if nr.fail_msg or not nr.payload:
5130 for (name, path, status, diagnose, variants,
5131 params, api_versions) in nr.payload:
5132 if name not in all_os:
5133 # build a list of nodes for this os containing empty lists
5134 # for each node in node_list
5136 for nname in good_nodes:
5137 all_os[name][nname] = []
5138 # convert params from [name, help] to (name, help)
5139 params = [tuple(v) for v in params]
5140 all_os[name][node_name].append((path, status, diagnose,
5141 variants, params, api_versions))
5144 def _GetQueryData(self, lu):
5145 """Computes the list of nodes and their attributes.
5148 # Locking is not used
5149 assert not (compat.any(lu.glm.is_owned(level)
5150 for level in locking.LEVELS
5151 if level != locking.LEVEL_CLUSTER) or
5152 self.do_locking or self.use_locking)
5154 valid_nodes = [node.name
5155 for node in lu.cfg.GetAllNodesInfo().values()
5156 if not node.offline and node.vm_capable]
5157 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5158 cluster = lu.cfg.GetClusterInfo()
5162 for (os_name, os_data) in pol.items():
5163 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5164 hidden=(os_name in cluster.hidden_os),
5165 blacklisted=(os_name in cluster.blacklisted_os))
5169 api_versions = set()
5171 for idx, osl in enumerate(os_data.values()):
5172 info.valid = bool(info.valid and osl and osl[0][1])
5176 (node_variants, node_params, node_api) = osl[0][3:6]
5179 variants.update(node_variants)
5180 parameters.update(node_params)
5181 api_versions.update(node_api)
5183 # Filter out inconsistent values
5184 variants.intersection_update(node_variants)
5185 parameters.intersection_update(node_params)
5186 api_versions.intersection_update(node_api)
5188 info.variants = list(variants)
5189 info.parameters = list(parameters)
5190 info.api_versions = list(api_versions)
5192 data[os_name] = info
5194 # Prepare data in requested order
5195 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5199 class LUOsDiagnose(NoHooksLU):
5200 """Logical unit for OS diagnose/query.
5206 def _BuildFilter(fields, names):
5207 """Builds a filter for querying OSes.
5210 name_filter = qlang.MakeSimpleFilter("name", names)
5212 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5213 # respective field is not requested
5214 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5215 for fname in ["hidden", "blacklisted"]
5216 if fname not in fields]
5217 if "valid" not in fields:
5218 status_filter.append([qlang.OP_TRUE, "valid"])
5221 status_filter.insert(0, qlang.OP_AND)
5223 status_filter = None
5225 if name_filter and status_filter:
5226 return [qlang.OP_AND, name_filter, status_filter]
5230 return status_filter
5232 def CheckArguments(self):
5233 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5234 self.op.output_fields, False)
5236 def ExpandNames(self):
5237 self.oq.ExpandNames(self)
5239 def Exec(self, feedback_fn):
5240 return self.oq.OldStyleQuery(self)
5243 class _ExtStorageQuery(_QueryBase):
5244 FIELDS = query.EXTSTORAGE_FIELDS
5246 def ExpandNames(self, lu):
5247 # Lock all nodes in shared mode
5248 # Temporary removal of locks, should be reverted later
5249 # TODO: reintroduce locks when they are lighter-weight
5250 lu.needed_locks = {}
5251 #self.share_locks[locking.LEVEL_NODE] = 1
5252 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5254 # The following variables interact with _QueryBase._GetNames
5256 self.wanted = self.names
5258 self.wanted = locking.ALL_SET
5260 self.do_locking = self.use_locking
5262 def DeclareLocks(self, lu, level):
5266 def _DiagnoseByProvider(rlist):
5267 """Remaps a per-node return list into an a per-provider per-node dictionary
5269 @param rlist: a map with node names as keys and ExtStorage objects as values
5272 @return: a dictionary with extstorage providers as keys and as
5273 value another map, with nodes as keys and tuples of
5274 (path, status, diagnose, parameters) as values, eg::
5276 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5277 "node2": [(/srv/..., False, "missing file")]
5278 "node3": [(/srv/..., True, "", [])]
5283 # we build here the list of nodes that didn't fail the RPC (at RPC
5284 # level), so that nodes with a non-responding node daemon don't
5285 # make all OSes invalid
5286 good_nodes = [node_name for node_name in rlist
5287 if not rlist[node_name].fail_msg]
5288 for node_name, nr in rlist.items():
5289 if nr.fail_msg or not nr.payload:
5291 for (name, path, status, diagnose, params) in nr.payload:
5292 if name not in all_es:
5293 # build a list of nodes for this os containing empty lists
5294 # for each node in node_list
5296 for nname in good_nodes:
5297 all_es[name][nname] = []
5298 # convert params from [name, help] to (name, help)
5299 params = [tuple(v) for v in params]
5300 all_es[name][node_name].append((path, status, diagnose, params))
5303 def _GetQueryData(self, lu):
5304 """Computes the list of nodes and their attributes.
5307 # Locking is not used
5308 assert not (compat.any(lu.glm.is_owned(level)
5309 for level in locking.LEVELS
5310 if level != locking.LEVEL_CLUSTER) or
5311 self.do_locking or self.use_locking)
5313 valid_nodes = [node.name
5314 for node in lu.cfg.GetAllNodesInfo().values()
5315 if not node.offline and node.vm_capable]
5316 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5320 nodegroup_list = lu.cfg.GetNodeGroupList()
5322 for (es_name, es_data) in pol.items():
5323 # For every provider compute the nodegroup validity.
5324 # To do this we need to check the validity of each node in es_data
5325 # and then construct the corresponding nodegroup dict:
5326 # { nodegroup1: status
5327 # nodegroup2: status
5330 for nodegroup in nodegroup_list:
5331 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5333 nodegroup_nodes = ndgrp.members
5334 nodegroup_name = ndgrp.name
5337 for node in nodegroup_nodes:
5338 if node in valid_nodes:
5339 if es_data[node] != []:
5340 node_status = es_data[node][0][1]
5341 node_statuses.append(node_status)
5343 node_statuses.append(False)
5345 if False in node_statuses:
5346 ndgrp_data[nodegroup_name] = False
5348 ndgrp_data[nodegroup_name] = True
5350 # Compute the provider's parameters
5352 for idx, esl in enumerate(es_data.values()):
5353 valid = bool(esl and esl[0][1])
5357 node_params = esl[0][3]
5360 parameters.update(node_params)
5362 # Filter out inconsistent values
5363 parameters.intersection_update(node_params)
5365 params = list(parameters)
5367 # Now fill all the info for this provider
5368 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5369 nodegroup_status=ndgrp_data,
5372 data[es_name] = info
5374 # Prepare data in requested order
5375 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5379 class LUExtStorageDiagnose(NoHooksLU):
5380 """Logical unit for ExtStorage diagnose/query.
5385 def CheckArguments(self):
5386 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5387 self.op.output_fields, False)
5389 def ExpandNames(self):
5390 self.eq.ExpandNames(self)
5392 def Exec(self, feedback_fn):
5393 return self.eq.OldStyleQuery(self)
5396 class LUNodeRemove(LogicalUnit):
5397 """Logical unit for removing a node.
5400 HPATH = "node-remove"
5401 HTYPE = constants.HTYPE_NODE
5403 def BuildHooksEnv(self):
5408 "OP_TARGET": self.op.node_name,
5409 "NODE_NAME": self.op.node_name,
5412 def BuildHooksNodes(self):
5413 """Build hooks nodes.
5415 This doesn't run on the target node in the pre phase as a failed
5416 node would then be impossible to remove.
5419 all_nodes = self.cfg.GetNodeList()
5421 all_nodes.remove(self.op.node_name)
5424 return (all_nodes, all_nodes)
5426 def CheckPrereq(self):
5427 """Check prerequisites.
5430 - the node exists in the configuration
5431 - it does not have primary or secondary instances
5432 - it's not the master
5434 Any errors are signaled by raising errors.OpPrereqError.
5437 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5438 node = self.cfg.GetNodeInfo(self.op.node_name)
5439 assert node is not None
5441 masternode = self.cfg.GetMasterNode()
5442 if node.name == masternode:
5443 raise errors.OpPrereqError("Node is the master node, failover to another"
5444 " node is required", errors.ECODE_INVAL)
5446 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5447 if node.name in instance.all_nodes:
5448 raise errors.OpPrereqError("Instance %s is still running on the node,"
5449 " please remove first" % instance_name,
5451 self.op.node_name = node.name
5454 def Exec(self, feedback_fn):
5455 """Removes the node from the cluster.
5459 logging.info("Stopping the node daemon and removing configs from node %s",
5462 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5464 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5467 # Promote nodes to master candidate as needed
5468 _AdjustCandidatePool(self, exceptions=[node.name])
5469 self.context.RemoveNode(node.name)
5471 # Run post hooks on the node before it's removed
5472 _RunPostHook(self, node.name)
5474 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5475 msg = result.fail_msg
5477 self.LogWarning("Errors encountered on the remote node while leaving"
5478 " the cluster: %s", msg)
5480 # Remove node from our /etc/hosts
5481 if self.cfg.GetClusterInfo().modify_etc_hosts:
5482 master_node = self.cfg.GetMasterNode()
5483 result = self.rpc.call_etc_hosts_modify(master_node,
5484 constants.ETC_HOSTS_REMOVE,
5486 result.Raise("Can't update hosts file with new host data")
5487 _RedistributeAncillaryFiles(self)
5490 class _NodeQuery(_QueryBase):
5491 FIELDS = query.NODE_FIELDS
5493 def ExpandNames(self, lu):
5494 lu.needed_locks = {}
5495 lu.share_locks = _ShareAll()
5498 self.wanted = _GetWantedNodes(lu, self.names)
5500 self.wanted = locking.ALL_SET
5502 self.do_locking = (self.use_locking and
5503 query.NQ_LIVE in self.requested_data)
5506 # If any non-static field is requested we need to lock the nodes
5507 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5508 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5510 def DeclareLocks(self, lu, level):
5513 def _GetQueryData(self, lu):
5514 """Computes the list of nodes and their attributes.
5517 all_info = lu.cfg.GetAllNodesInfo()
5519 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5521 # Gather data as requested
5522 if query.NQ_LIVE in self.requested_data:
5523 # filter out non-vm_capable nodes
5524 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5526 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5527 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5528 [lu.cfg.GetHypervisorType()], es_flags)
5529 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5530 for (name, nresult) in node_data.items()
5531 if not nresult.fail_msg and nresult.payload)
5535 if query.NQ_INST in self.requested_data:
5536 node_to_primary = dict([(name, set()) for name in nodenames])
5537 node_to_secondary = dict([(name, set()) for name in nodenames])
5539 inst_data = lu.cfg.GetAllInstancesInfo()
5541 for inst in inst_data.values():
5542 if inst.primary_node in node_to_primary:
5543 node_to_primary[inst.primary_node].add(inst.name)
5544 for secnode in inst.secondary_nodes:
5545 if secnode in node_to_secondary:
5546 node_to_secondary[secnode].add(inst.name)
5548 node_to_primary = None
5549 node_to_secondary = None
5551 if query.NQ_OOB in self.requested_data:
5552 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5553 for name, node in all_info.iteritems())
5557 if query.NQ_GROUP in self.requested_data:
5558 groups = lu.cfg.GetAllNodeGroupsInfo()
5562 return query.NodeQueryData([all_info[name] for name in nodenames],
5563 live_data, lu.cfg.GetMasterNode(),
5564 node_to_primary, node_to_secondary, groups,
5565 oob_support, lu.cfg.GetClusterInfo())
5568 class LUNodeQuery(NoHooksLU):
5569 """Logical unit for querying nodes.
5572 # pylint: disable=W0142
5575 def CheckArguments(self):
5576 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5577 self.op.output_fields, self.op.use_locking)
5579 def ExpandNames(self):
5580 self.nq.ExpandNames(self)
5582 def DeclareLocks(self, level):
5583 self.nq.DeclareLocks(self, level)
5585 def Exec(self, feedback_fn):
5586 return self.nq.OldStyleQuery(self)
5589 class LUNodeQueryvols(NoHooksLU):
5590 """Logical unit for getting volumes on node(s).
5594 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5595 _FIELDS_STATIC = utils.FieldSet("node")
5597 def CheckArguments(self):
5598 _CheckOutputFields(static=self._FIELDS_STATIC,
5599 dynamic=self._FIELDS_DYNAMIC,
5600 selected=self.op.output_fields)
5602 def ExpandNames(self):
5603 self.share_locks = _ShareAll()
5606 self.needed_locks = {
5607 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5610 self.needed_locks = {
5611 locking.LEVEL_NODE: locking.ALL_SET,
5612 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5615 def Exec(self, feedback_fn):
5616 """Computes the list of nodes and their attributes.
5619 nodenames = self.owned_locks(locking.LEVEL_NODE)
5620 volumes = self.rpc.call_node_volumes(nodenames)
5622 ilist = self.cfg.GetAllInstancesInfo()
5623 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5626 for node in nodenames:
5627 nresult = volumes[node]
5630 msg = nresult.fail_msg
5632 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5635 node_vols = sorted(nresult.payload,
5636 key=operator.itemgetter("dev"))
5638 for vol in node_vols:
5640 for field in self.op.output_fields:
5643 elif field == "phys":
5647 elif field == "name":
5649 elif field == "size":
5650 val = int(float(vol["size"]))
5651 elif field == "instance":
5652 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5654 raise errors.ParameterError(field)
5655 node_output.append(str(val))
5657 output.append(node_output)
5662 class LUNodeQueryStorage(NoHooksLU):
5663 """Logical unit for getting information on storage units on node(s).
5666 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5669 def CheckArguments(self):
5670 _CheckOutputFields(static=self._FIELDS_STATIC,
5671 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5672 selected=self.op.output_fields)
5674 def ExpandNames(self):
5675 self.share_locks = _ShareAll()
5678 self.needed_locks = {
5679 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5682 self.needed_locks = {
5683 locking.LEVEL_NODE: locking.ALL_SET,
5684 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5687 def Exec(self, feedback_fn):
5688 """Computes the list of nodes and their attributes.
5691 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5693 # Always get name to sort by
5694 if constants.SF_NAME in self.op.output_fields:
5695 fields = self.op.output_fields[:]
5697 fields = [constants.SF_NAME] + self.op.output_fields
5699 # Never ask for node or type as it's only known to the LU
5700 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5701 while extra in fields:
5702 fields.remove(extra)
5704 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5705 name_idx = field_idx[constants.SF_NAME]
5707 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5708 data = self.rpc.call_storage_list(self.nodes,
5709 self.op.storage_type, st_args,
5710 self.op.name, fields)
5714 for node in utils.NiceSort(self.nodes):
5715 nresult = data[node]
5719 msg = nresult.fail_msg
5721 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5724 rows = dict([(row[name_idx], row) for row in nresult.payload])
5726 for name in utils.NiceSort(rows.keys()):
5731 for field in self.op.output_fields:
5732 if field == constants.SF_NODE:
5734 elif field == constants.SF_TYPE:
5735 val = self.op.storage_type
5736 elif field in field_idx:
5737 val = row[field_idx[field]]
5739 raise errors.ParameterError(field)
5748 class _InstanceQuery(_QueryBase):
5749 FIELDS = query.INSTANCE_FIELDS
5751 def ExpandNames(self, lu):
5752 lu.needed_locks = {}
5753 lu.share_locks = _ShareAll()
5756 self.wanted = _GetWantedInstances(lu, self.names)
5758 self.wanted = locking.ALL_SET
5760 self.do_locking = (self.use_locking and
5761 query.IQ_LIVE in self.requested_data)
5763 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5764 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5765 lu.needed_locks[locking.LEVEL_NODE] = []
5766 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5768 self.do_grouplocks = (self.do_locking and
5769 query.IQ_NODES in self.requested_data)
5771 def DeclareLocks(self, lu, level):
5773 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5774 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5776 # Lock all groups used by instances optimistically; this requires going
5777 # via the node before it's locked, requiring verification later on
5778 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5780 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5781 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5782 elif level == locking.LEVEL_NODE:
5783 lu._LockInstancesNodes() # pylint: disable=W0212
5786 def _CheckGroupLocks(lu):
5787 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5788 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5790 # Check if node groups for locked instances are still correct
5791 for instance_name in owned_instances:
5792 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5794 def _GetQueryData(self, lu):
5795 """Computes the list of instances and their attributes.
5798 if self.do_grouplocks:
5799 self._CheckGroupLocks(lu)
5801 cluster = lu.cfg.GetClusterInfo()
5802 all_info = lu.cfg.GetAllInstancesInfo()
5804 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5806 instance_list = [all_info[name] for name in instance_names]
5807 nodes = frozenset(itertools.chain(*(inst.all_nodes
5808 for inst in instance_list)))
5809 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5812 wrongnode_inst = set()
5814 # Gather data as requested
5815 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5817 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5819 result = node_data[name]
5821 # offline nodes will be in both lists
5822 assert result.fail_msg
5823 offline_nodes.append(name)
5825 bad_nodes.append(name)
5826 elif result.payload:
5827 for inst in result.payload:
5828 if inst in all_info:
5829 if all_info[inst].primary_node == name:
5830 live_data.update(result.payload)
5832 wrongnode_inst.add(inst)
5834 # orphan instance; we don't list it here as we don't
5835 # handle this case yet in the output of instance listing
5836 logging.warning("Orphan instance '%s' found on node %s",
5838 # else no instance is alive
5842 if query.IQ_DISKUSAGE in self.requested_data:
5843 gmi = ganeti.masterd.instance
5844 disk_usage = dict((inst.name,
5845 gmi.ComputeDiskSize(inst.disk_template,
5846 [{constants.IDISK_SIZE: disk.size}
5847 for disk in inst.disks]))
5848 for inst in instance_list)
5852 if query.IQ_CONSOLE in self.requested_data:
5854 for inst in instance_list:
5855 if inst.name in live_data:
5856 # Instance is running
5857 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5859 consinfo[inst.name] = None
5860 assert set(consinfo.keys()) == set(instance_names)
5864 if query.IQ_NODES in self.requested_data:
5865 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5867 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5868 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5869 for uuid in set(map(operator.attrgetter("group"),
5875 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5876 disk_usage, offline_nodes, bad_nodes,
5877 live_data, wrongnode_inst, consinfo,
5881 class LUQuery(NoHooksLU):
5882 """Query for resources/items of a certain kind.
5885 # pylint: disable=W0142
5888 def CheckArguments(self):
5889 qcls = _GetQueryImplementation(self.op.what)
5891 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5893 def ExpandNames(self):
5894 self.impl.ExpandNames(self)
5896 def DeclareLocks(self, level):
5897 self.impl.DeclareLocks(self, level)
5899 def Exec(self, feedback_fn):
5900 return self.impl.NewStyleQuery(self)
5903 class LUQueryFields(NoHooksLU):
5904 """Query for resources/items of a certain kind.
5907 # pylint: disable=W0142
5910 def CheckArguments(self):
5911 self.qcls = _GetQueryImplementation(self.op.what)
5913 def ExpandNames(self):
5914 self.needed_locks = {}
5916 def Exec(self, feedback_fn):
5917 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5920 class LUNodeModifyStorage(NoHooksLU):
5921 """Logical unit for modifying a storage volume on a node.
5926 def CheckArguments(self):
5927 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5929 storage_type = self.op.storage_type
5932 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5934 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5935 " modified" % storage_type,
5938 diff = set(self.op.changes.keys()) - modifiable
5940 raise errors.OpPrereqError("The following fields can not be modified for"
5941 " storage units of type '%s': %r" %
5942 (storage_type, list(diff)),
5945 def ExpandNames(self):
5946 self.needed_locks = {
5947 locking.LEVEL_NODE: self.op.node_name,
5950 def Exec(self, feedback_fn):
5951 """Computes the list of nodes and their attributes.
5954 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5955 result = self.rpc.call_storage_modify(self.op.node_name,
5956 self.op.storage_type, st_args,
5957 self.op.name, self.op.changes)
5958 result.Raise("Failed to modify storage unit '%s' on %s" %
5959 (self.op.name, self.op.node_name))
5962 class LUNodeAdd(LogicalUnit):
5963 """Logical unit for adding node to the cluster.
5967 HTYPE = constants.HTYPE_NODE
5968 _NFLAGS = ["master_capable", "vm_capable"]
5970 def CheckArguments(self):
5971 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5972 # validate/normalize the node name
5973 self.hostname = netutils.GetHostname(name=self.op.node_name,
5974 family=self.primary_ip_family)
5975 self.op.node_name = self.hostname.name
5977 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5978 raise errors.OpPrereqError("Cannot readd the master node",
5981 if self.op.readd and self.op.group:
5982 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5983 " being readded", errors.ECODE_INVAL)
5985 def BuildHooksEnv(self):
5988 This will run on all nodes before, and on all nodes + the new node after.
5992 "OP_TARGET": self.op.node_name,
5993 "NODE_NAME": self.op.node_name,
5994 "NODE_PIP": self.op.primary_ip,
5995 "NODE_SIP": self.op.secondary_ip,
5996 "MASTER_CAPABLE": str(self.op.master_capable),
5997 "VM_CAPABLE": str(self.op.vm_capable),
6000 def BuildHooksNodes(self):
6001 """Build hooks nodes.
6004 # Exclude added node
6005 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6006 post_nodes = pre_nodes + [self.op.node_name, ]
6008 return (pre_nodes, post_nodes)
6010 def CheckPrereq(self):
6011 """Check prerequisites.
6014 - the new node is not already in the config
6016 - its parameters (single/dual homed) matches the cluster
6018 Any errors are signaled by raising errors.OpPrereqError.
6022 hostname = self.hostname
6023 node = hostname.name
6024 primary_ip = self.op.primary_ip = hostname.ip
6025 if self.op.secondary_ip is None:
6026 if self.primary_ip_family == netutils.IP6Address.family:
6027 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6028 " IPv4 address must be given as secondary",
6030 self.op.secondary_ip = primary_ip
6032 secondary_ip = self.op.secondary_ip
6033 if not netutils.IP4Address.IsValid(secondary_ip):
6034 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6035 " address" % secondary_ip, errors.ECODE_INVAL)
6037 node_list = cfg.GetNodeList()
6038 if not self.op.readd and node in node_list:
6039 raise errors.OpPrereqError("Node %s is already in the configuration" %
6040 node, errors.ECODE_EXISTS)
6041 elif self.op.readd and node not in node_list:
6042 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6045 self.changed_primary_ip = False
6047 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6048 if self.op.readd and node == existing_node_name:
6049 if existing_node.secondary_ip != secondary_ip:
6050 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6051 " address configuration as before",
6053 if existing_node.primary_ip != primary_ip:
6054 self.changed_primary_ip = True
6058 if (existing_node.primary_ip == primary_ip or
6059 existing_node.secondary_ip == primary_ip or
6060 existing_node.primary_ip == secondary_ip or
6061 existing_node.secondary_ip == secondary_ip):
6062 raise errors.OpPrereqError("New node ip address(es) conflict with"
6063 " existing node %s" % existing_node.name,
6064 errors.ECODE_NOTUNIQUE)
6066 # After this 'if' block, None is no longer a valid value for the
6067 # _capable op attributes
6069 old_node = self.cfg.GetNodeInfo(node)
6070 assert old_node is not None, "Can't retrieve locked node %s" % node
6071 for attr in self._NFLAGS:
6072 if getattr(self.op, attr) is None:
6073 setattr(self.op, attr, getattr(old_node, attr))
6075 for attr in self._NFLAGS:
6076 if getattr(self.op, attr) is None:
6077 setattr(self.op, attr, True)
6079 if self.op.readd and not self.op.vm_capable:
6080 pri, sec = cfg.GetNodeInstances(node)
6082 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6083 " flag set to false, but it already holds"
6084 " instances" % node,
6087 # check that the type of the node (single versus dual homed) is the
6088 # same as for the master
6089 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6090 master_singlehomed = myself.secondary_ip == myself.primary_ip
6091 newbie_singlehomed = secondary_ip == primary_ip
6092 if master_singlehomed != newbie_singlehomed:
6093 if master_singlehomed:
6094 raise errors.OpPrereqError("The master has no secondary ip but the"
6095 " new node has one",
6098 raise errors.OpPrereqError("The master has a secondary ip but the"
6099 " new node doesn't have one",
6102 # checks reachability
6103 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6104 raise errors.OpPrereqError("Node not reachable by ping",
6105 errors.ECODE_ENVIRON)
6107 if not newbie_singlehomed:
6108 # check reachability from my secondary ip to newbie's secondary ip
6109 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6110 source=myself.secondary_ip):
6111 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6112 " based ping to node daemon port",
6113 errors.ECODE_ENVIRON)
6120 if self.op.master_capable:
6121 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6123 self.master_candidate = False
6126 self.new_node = old_node
6128 node_group = cfg.LookupNodeGroup(self.op.group)
6129 self.new_node = objects.Node(name=node,
6130 primary_ip=primary_ip,
6131 secondary_ip=secondary_ip,
6132 master_candidate=self.master_candidate,
6133 offline=False, drained=False,
6134 group=node_group, ndparams={})
6136 if self.op.ndparams:
6137 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6139 if self.op.hv_state:
6140 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6142 if self.op.disk_state:
6143 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6145 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6146 # it a property on the base class.
6147 rpcrunner = rpc.DnsOnlyRunner()
6148 result = rpcrunner.call_version([node])[node]
6149 result.Raise("Can't get version information from node %s" % node)
6150 if constants.PROTOCOL_VERSION == result.payload:
6151 logging.info("Communication to node %s fine, sw version %s match",
6152 node, result.payload)
6154 raise errors.OpPrereqError("Version mismatch master version %s,"
6155 " node version %s" %
6156 (constants.PROTOCOL_VERSION, result.payload),
6157 errors.ECODE_ENVIRON)
6159 vg_name = cfg.GetVGName()
6160 if vg_name is not None:
6161 vparams = {constants.NV_PVLIST: [vg_name]}
6162 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6163 if self.op.ndparams:
6164 excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6166 cname = self.cfg.GetClusterName()
6167 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6168 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6170 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6171 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6173 def Exec(self, feedback_fn):
6174 """Adds the new node to the cluster.
6177 new_node = self.new_node
6178 node = new_node.name
6180 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6183 # We adding a new node so we assume it's powered
6184 new_node.powered = True
6186 # for re-adds, reset the offline/drained/master-candidate flags;
6187 # we need to reset here, otherwise offline would prevent RPC calls
6188 # later in the procedure; this also means that if the re-add
6189 # fails, we are left with a non-offlined, broken node
6191 new_node.drained = new_node.offline = False # pylint: disable=W0201
6192 self.LogInfo("Readding a node, the offline/drained flags were reset")
6193 # if we demote the node, we do cleanup later in the procedure
6194 new_node.master_candidate = self.master_candidate
6195 if self.changed_primary_ip:
6196 new_node.primary_ip = self.op.primary_ip
6198 # copy the master/vm_capable flags
6199 for attr in self._NFLAGS:
6200 setattr(new_node, attr, getattr(self.op, attr))
6202 # notify the user about any possible mc promotion
6203 if new_node.master_candidate:
6204 self.LogInfo("Node will be a master candidate")
6206 if self.op.ndparams:
6207 new_node.ndparams = self.op.ndparams
6209 new_node.ndparams = {}
6211 if self.op.hv_state:
6212 new_node.hv_state_static = self.new_hv_state
6214 if self.op.disk_state:
6215 new_node.disk_state_static = self.new_disk_state
6217 # Add node to our /etc/hosts, and add key to known_hosts
6218 if self.cfg.GetClusterInfo().modify_etc_hosts:
6219 master_node = self.cfg.GetMasterNode()
6220 result = self.rpc.call_etc_hosts_modify(master_node,
6221 constants.ETC_HOSTS_ADD,
6224 result.Raise("Can't update hosts file with new host data")
6226 if new_node.secondary_ip != new_node.primary_ip:
6227 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6230 node_verify_list = [self.cfg.GetMasterNode()]
6231 node_verify_param = {
6232 constants.NV_NODELIST: ([node], {}),
6233 # TODO: do a node-net-test as well?
6236 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6237 self.cfg.GetClusterName())
6238 for verifier in node_verify_list:
6239 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6240 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6242 for failed in nl_payload:
6243 feedback_fn("ssh/hostname verification failed"
6244 " (checking from %s): %s" %
6245 (verifier, nl_payload[failed]))
6246 raise errors.OpExecError("ssh/hostname verification failed")
6249 _RedistributeAncillaryFiles(self)
6250 self.context.ReaddNode(new_node)
6251 # make sure we redistribute the config
6252 self.cfg.Update(new_node, feedback_fn)
6253 # and make sure the new node will not have old files around
6254 if not new_node.master_candidate:
6255 result = self.rpc.call_node_demote_from_mc(new_node.name)
6256 msg = result.fail_msg
6258 self.LogWarning("Node failed to demote itself from master"
6259 " candidate status: %s" % msg)
6261 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6262 additional_vm=self.op.vm_capable)
6263 self.context.AddNode(new_node, self.proc.GetECId())
6266 class LUNodeSetParams(LogicalUnit):
6267 """Modifies the parameters of a node.
6269 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6270 to the node role (as _ROLE_*)
6271 @cvar _R2F: a dictionary from node role to tuples of flags
6272 @cvar _FLAGS: a list of attribute names corresponding to the flags
6275 HPATH = "node-modify"
6276 HTYPE = constants.HTYPE_NODE
6278 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6280 (True, False, False): _ROLE_CANDIDATE,
6281 (False, True, False): _ROLE_DRAINED,
6282 (False, False, True): _ROLE_OFFLINE,
6283 (False, False, False): _ROLE_REGULAR,
6285 _R2F = dict((v, k) for k, v in _F2R.items())
6286 _FLAGS = ["master_candidate", "drained", "offline"]
6288 def CheckArguments(self):
6289 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6290 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6291 self.op.master_capable, self.op.vm_capable,
6292 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6294 if all_mods.count(None) == len(all_mods):
6295 raise errors.OpPrereqError("Please pass at least one modification",
6297 if all_mods.count(True) > 1:
6298 raise errors.OpPrereqError("Can't set the node into more than one"
6299 " state at the same time",
6302 # Boolean value that tells us whether we might be demoting from MC
6303 self.might_demote = (self.op.master_candidate is False or
6304 self.op.offline is True or
6305 self.op.drained is True or
6306 self.op.master_capable is False)
6308 if self.op.secondary_ip:
6309 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6310 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6311 " address" % self.op.secondary_ip,
6314 self.lock_all = self.op.auto_promote and self.might_demote
6315 self.lock_instances = self.op.secondary_ip is not None
6317 def _InstanceFilter(self, instance):
6318 """Filter for getting affected instances.
6321 return (instance.disk_template in constants.DTS_INT_MIRROR and
6322 self.op.node_name in instance.all_nodes)
6324 def ExpandNames(self):
6326 self.needed_locks = {
6327 locking.LEVEL_NODE: locking.ALL_SET,
6329 # Block allocations when all nodes are locked
6330 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6333 self.needed_locks = {
6334 locking.LEVEL_NODE: self.op.node_name,
6337 # Since modifying a node can have severe effects on currently running
6338 # operations the resource lock is at least acquired in shared mode
6339 self.needed_locks[locking.LEVEL_NODE_RES] = \
6340 self.needed_locks[locking.LEVEL_NODE]
6342 # Get all locks except nodes in shared mode; they are not used for anything
6343 # but read-only access
6344 self.share_locks = _ShareAll()
6345 self.share_locks[locking.LEVEL_NODE] = 0
6346 self.share_locks[locking.LEVEL_NODE_RES] = 0
6347 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6349 if self.lock_instances:
6350 self.needed_locks[locking.LEVEL_INSTANCE] = \
6351 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6353 def BuildHooksEnv(self):
6356 This runs on the master node.
6360 "OP_TARGET": self.op.node_name,
6361 "MASTER_CANDIDATE": str(self.op.master_candidate),
6362 "OFFLINE": str(self.op.offline),
6363 "DRAINED": str(self.op.drained),
6364 "MASTER_CAPABLE": str(self.op.master_capable),
6365 "VM_CAPABLE": str(self.op.vm_capable),
6368 def BuildHooksNodes(self):
6369 """Build hooks nodes.
6372 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6375 def CheckPrereq(self):
6376 """Check prerequisites.
6378 This only checks the instance list against the existing names.
6381 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6383 if self.lock_instances:
6384 affected_instances = \
6385 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6387 # Verify instance locks
6388 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6389 wanted_instances = frozenset(affected_instances.keys())
6390 if wanted_instances - owned_instances:
6391 raise errors.OpPrereqError("Instances affected by changing node %s's"
6392 " secondary IP address have changed since"
6393 " locks were acquired, wanted '%s', have"
6394 " '%s'; retry the operation" %
6396 utils.CommaJoin(wanted_instances),
6397 utils.CommaJoin(owned_instances)),
6400 affected_instances = None
6402 if (self.op.master_candidate is not None or
6403 self.op.drained is not None or
6404 self.op.offline is not None):
6405 # we can't change the master's node flags
6406 if self.op.node_name == self.cfg.GetMasterNode():
6407 raise errors.OpPrereqError("The master role can be changed"
6408 " only via master-failover",
6411 if self.op.master_candidate and not node.master_capable:
6412 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6413 " it a master candidate" % node.name,
6416 if self.op.vm_capable is False:
6417 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6419 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6420 " the vm_capable flag" % node.name,
6423 if node.master_candidate and self.might_demote and not self.lock_all:
6424 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6425 # check if after removing the current node, we're missing master
6427 (mc_remaining, mc_should, _) = \
6428 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6429 if mc_remaining < mc_should:
6430 raise errors.OpPrereqError("Not enough master candidates, please"
6431 " pass auto promote option to allow"
6432 " promotion (--auto-promote or RAPI"
6433 " auto_promote=True)", errors.ECODE_STATE)
6435 self.old_flags = old_flags = (node.master_candidate,
6436 node.drained, node.offline)
6437 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6438 self.old_role = old_role = self._F2R[old_flags]
6440 # Check for ineffective changes
6441 for attr in self._FLAGS:
6442 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6443 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6444 setattr(self.op, attr, None)
6446 # Past this point, any flag change to False means a transition
6447 # away from the respective state, as only real changes are kept
6449 # TODO: We might query the real power state if it supports OOB
6450 if _SupportsOob(self.cfg, node):
6451 if self.op.offline is False and not (node.powered or
6452 self.op.powered is True):
6453 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6454 " offline status can be reset") %
6455 self.op.node_name, errors.ECODE_STATE)
6456 elif self.op.powered is not None:
6457 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6458 " as it does not support out-of-band"
6459 " handling") % self.op.node_name,
6462 # If we're being deofflined/drained, we'll MC ourself if needed
6463 if (self.op.drained is False or self.op.offline is False or
6464 (self.op.master_capable and not node.master_capable)):
6465 if _DecideSelfPromotion(self):
6466 self.op.master_candidate = True
6467 self.LogInfo("Auto-promoting node to master candidate")
6469 # If we're no longer master capable, we'll demote ourselves from MC
6470 if self.op.master_capable is False and node.master_candidate:
6471 self.LogInfo("Demoting from master candidate")
6472 self.op.master_candidate = False
6475 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6476 if self.op.master_candidate:
6477 new_role = self._ROLE_CANDIDATE
6478 elif self.op.drained:
6479 new_role = self._ROLE_DRAINED
6480 elif self.op.offline:
6481 new_role = self._ROLE_OFFLINE
6482 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6483 # False is still in new flags, which means we're un-setting (the
6485 new_role = self._ROLE_REGULAR
6486 else: # no new flags, nothing, keep old role
6489 self.new_role = new_role
6491 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6492 # Trying to transition out of offline status
6493 result = self.rpc.call_version([node.name])[node.name]
6495 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6496 " to report its version: %s" %
6497 (node.name, result.fail_msg),
6500 self.LogWarning("Transitioning node from offline to online state"
6501 " without using re-add. Please make sure the node"
6504 # When changing the secondary ip, verify if this is a single-homed to
6505 # multi-homed transition or vice versa, and apply the relevant
6507 if self.op.secondary_ip:
6508 # Ok even without locking, because this can't be changed by any LU
6509 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6510 master_singlehomed = master.secondary_ip == master.primary_ip
6511 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6512 if self.op.force and node.name == master.name:
6513 self.LogWarning("Transitioning from single-homed to multi-homed"
6514 " cluster; all nodes will require a secondary IP"
6517 raise errors.OpPrereqError("Changing the secondary ip on a"
6518 " single-homed cluster requires the"
6519 " --force option to be passed, and the"
6520 " target node to be the master",
6522 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6523 if self.op.force and node.name == master.name:
6524 self.LogWarning("Transitioning from multi-homed to single-homed"
6525 " cluster; secondary IP addresses will have to be"
6528 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6529 " same as the primary IP on a multi-homed"
6530 " cluster, unless the --force option is"
6531 " passed, and the target node is the"
6532 " master", errors.ECODE_INVAL)
6534 assert not (frozenset(affected_instances) -
6535 self.owned_locks(locking.LEVEL_INSTANCE))
6538 if affected_instances:
6539 msg = ("Cannot change secondary IP address: offline node has"
6540 " instances (%s) configured to use it" %
6541 utils.CommaJoin(affected_instances.keys()))
6542 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6544 # On online nodes, check that no instances are running, and that
6545 # the node has the new ip and we can reach it.
6546 for instance in affected_instances.values():
6547 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6548 msg="cannot change secondary ip")
6550 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6551 if master.name != node.name:
6552 # check reachability from master secondary ip to new secondary ip
6553 if not netutils.TcpPing(self.op.secondary_ip,
6554 constants.DEFAULT_NODED_PORT,
6555 source=master.secondary_ip):
6556 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6557 " based ping to node daemon port",
6558 errors.ECODE_ENVIRON)
6560 if self.op.ndparams:
6561 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6562 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6563 self.new_ndparams = new_ndparams
6565 if self.op.hv_state:
6566 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6567 self.node.hv_state_static)
6569 if self.op.disk_state:
6570 self.new_disk_state = \
6571 _MergeAndVerifyDiskState(self.op.disk_state,
6572 self.node.disk_state_static)
6574 def Exec(self, feedback_fn):
6579 old_role = self.old_role
6580 new_role = self.new_role
6584 if self.op.ndparams:
6585 node.ndparams = self.new_ndparams
6587 if self.op.powered is not None:
6588 node.powered = self.op.powered
6590 if self.op.hv_state:
6591 node.hv_state_static = self.new_hv_state
6593 if self.op.disk_state:
6594 node.disk_state_static = self.new_disk_state
6596 for attr in ["master_capable", "vm_capable"]:
6597 val = getattr(self.op, attr)
6599 setattr(node, attr, val)
6600 result.append((attr, str(val)))
6602 if new_role != old_role:
6603 # Tell the node to demote itself, if no longer MC and not offline
6604 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6605 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6607 self.LogWarning("Node failed to demote itself: %s", msg)
6609 new_flags = self._R2F[new_role]
6610 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6612 result.append((desc, str(nf)))
6613 (node.master_candidate, node.drained, node.offline) = new_flags
6615 # we locked all nodes, we adjust the CP before updating this node
6617 _AdjustCandidatePool(self, [node.name])
6619 if self.op.secondary_ip:
6620 node.secondary_ip = self.op.secondary_ip
6621 result.append(("secondary_ip", self.op.secondary_ip))
6623 # this will trigger configuration file update, if needed
6624 self.cfg.Update(node, feedback_fn)
6626 # this will trigger job queue propagation or cleanup if the mc
6628 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6629 self.context.ReaddNode(node)
6634 class LUNodePowercycle(NoHooksLU):
6635 """Powercycles a node.
6640 def CheckArguments(self):
6641 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6642 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6643 raise errors.OpPrereqError("The node is the master and the force"
6644 " parameter was not set",
6647 def ExpandNames(self):
6648 """Locking for PowercycleNode.
6650 This is a last-resort option and shouldn't block on other
6651 jobs. Therefore, we grab no locks.
6654 self.needed_locks = {}
6656 def Exec(self, feedback_fn):
6660 result = self.rpc.call_node_powercycle(self.op.node_name,
6661 self.cfg.GetHypervisorType())
6662 result.Raise("Failed to schedule the reboot")
6663 return result.payload
6666 class LUClusterQuery(NoHooksLU):
6667 """Query cluster configuration.
6672 def ExpandNames(self):
6673 self.needed_locks = {}
6675 def Exec(self, feedback_fn):
6676 """Return cluster config.
6679 cluster = self.cfg.GetClusterInfo()
6682 # Filter just for enabled hypervisors
6683 for os_name, hv_dict in cluster.os_hvp.items():
6684 os_hvp[os_name] = {}
6685 for hv_name, hv_params in hv_dict.items():
6686 if hv_name in cluster.enabled_hypervisors:
6687 os_hvp[os_name][hv_name] = hv_params
6689 # Convert ip_family to ip_version
6690 primary_ip_version = constants.IP4_VERSION
6691 if cluster.primary_ip_family == netutils.IP6Address.family:
6692 primary_ip_version = constants.IP6_VERSION
6695 "software_version": constants.RELEASE_VERSION,
6696 "protocol_version": constants.PROTOCOL_VERSION,
6697 "config_version": constants.CONFIG_VERSION,
6698 "os_api_version": max(constants.OS_API_VERSIONS),
6699 "export_version": constants.EXPORT_VERSION,
6700 "architecture": runtime.GetArchInfo(),
6701 "name": cluster.cluster_name,
6702 "master": cluster.master_node,
6703 "default_hypervisor": cluster.primary_hypervisor,
6704 "enabled_hypervisors": cluster.enabled_hypervisors,
6705 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6706 for hypervisor_name in cluster.enabled_hypervisors]),
6708 "beparams": cluster.beparams,
6709 "osparams": cluster.osparams,
6710 "ipolicy": cluster.ipolicy,
6711 "nicparams": cluster.nicparams,
6712 "ndparams": cluster.ndparams,
6713 "diskparams": cluster.diskparams,
6714 "candidate_pool_size": cluster.candidate_pool_size,
6715 "master_netdev": cluster.master_netdev,
6716 "master_netmask": cluster.master_netmask,
6717 "use_external_mip_script": cluster.use_external_mip_script,
6718 "volume_group_name": cluster.volume_group_name,
6719 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6720 "file_storage_dir": cluster.file_storage_dir,
6721 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6722 "maintain_node_health": cluster.maintain_node_health,
6723 "ctime": cluster.ctime,
6724 "mtime": cluster.mtime,
6725 "uuid": cluster.uuid,
6726 "tags": list(cluster.GetTags()),
6727 "uid_pool": cluster.uid_pool,
6728 "default_iallocator": cluster.default_iallocator,
6729 "reserved_lvs": cluster.reserved_lvs,
6730 "primary_ip_version": primary_ip_version,
6731 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6732 "hidden_os": cluster.hidden_os,
6733 "blacklisted_os": cluster.blacklisted_os,
6739 class LUClusterConfigQuery(NoHooksLU):
6740 """Return configuration values.
6745 def CheckArguments(self):
6746 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6748 def ExpandNames(self):
6749 self.cq.ExpandNames(self)
6751 def DeclareLocks(self, level):
6752 self.cq.DeclareLocks(self, level)
6754 def Exec(self, feedback_fn):
6755 result = self.cq.OldStyleQuery(self)
6757 assert len(result) == 1
6762 class _ClusterQuery(_QueryBase):
6763 FIELDS = query.CLUSTER_FIELDS
6765 #: Do not sort (there is only one item)
6768 def ExpandNames(self, lu):
6769 lu.needed_locks = {}
6771 # The following variables interact with _QueryBase._GetNames
6772 self.wanted = locking.ALL_SET
6773 self.do_locking = self.use_locking
6776 raise errors.OpPrereqError("Can not use locking for cluster queries",
6779 def DeclareLocks(self, lu, level):
6782 def _GetQueryData(self, lu):
6783 """Computes the list of nodes and their attributes.
6786 # Locking is not used
6787 assert not (compat.any(lu.glm.is_owned(level)
6788 for level in locking.LEVELS
6789 if level != locking.LEVEL_CLUSTER) or
6790 self.do_locking or self.use_locking)
6792 if query.CQ_CONFIG in self.requested_data:
6793 cluster = lu.cfg.GetClusterInfo()
6795 cluster = NotImplemented
6797 if query.CQ_QUEUE_DRAINED in self.requested_data:
6798 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6800 drain_flag = NotImplemented
6802 if query.CQ_WATCHER_PAUSE in self.requested_data:
6803 master_name = lu.cfg.GetMasterNode()
6805 result = lu.rpc.call_get_watcher_pause(master_name)
6806 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6809 watcher_pause = result.payload
6811 watcher_pause = NotImplemented
6813 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6816 class LUInstanceActivateDisks(NoHooksLU):
6817 """Bring up an instance's disks.
6822 def ExpandNames(self):
6823 self._ExpandAndLockInstance()
6824 self.needed_locks[locking.LEVEL_NODE] = []
6825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6827 def DeclareLocks(self, level):
6828 if level == locking.LEVEL_NODE:
6829 self._LockInstancesNodes()
6831 def CheckPrereq(self):
6832 """Check prerequisites.
6834 This checks that the instance is in the cluster.
6837 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6838 assert self.instance is not None, \
6839 "Cannot retrieve locked instance %s" % self.op.instance_name
6840 _CheckNodeOnline(self, self.instance.primary_node)
6842 def Exec(self, feedback_fn):
6843 """Activate the disks.
6846 disks_ok, disks_info = \
6847 _AssembleInstanceDisks(self, self.instance,
6848 ignore_size=self.op.ignore_size)
6850 raise errors.OpExecError("Cannot activate block devices")
6852 if self.op.wait_for_sync:
6853 if not _WaitForSync(self, self.instance):
6854 raise errors.OpExecError("Some disks of the instance are degraded!")
6859 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6861 """Prepare the block devices for an instance.
6863 This sets up the block devices on all nodes.
6865 @type lu: L{LogicalUnit}
6866 @param lu: the logical unit on whose behalf we execute
6867 @type instance: L{objects.Instance}
6868 @param instance: the instance for whose disks we assemble
6869 @type disks: list of L{objects.Disk} or None
6870 @param disks: which disks to assemble (or all, if None)
6871 @type ignore_secondaries: boolean
6872 @param ignore_secondaries: if true, errors on secondary nodes
6873 won't result in an error return from the function
6874 @type ignore_size: boolean
6875 @param ignore_size: if true, the current known size of the disk
6876 will not be used during the disk activation, useful for cases
6877 when the size is wrong
6878 @return: False if the operation failed, otherwise a list of
6879 (host, instance_visible_name, node_visible_name)
6880 with the mapping from node devices to instance devices
6885 iname = instance.name
6886 disks = _ExpandCheckDisks(instance, disks)
6888 # With the two passes mechanism we try to reduce the window of
6889 # opportunity for the race condition of switching DRBD to primary
6890 # before handshaking occured, but we do not eliminate it
6892 # The proper fix would be to wait (with some limits) until the
6893 # connection has been made and drbd transitions from WFConnection
6894 # into any other network-connected state (Connected, SyncTarget,
6897 # 1st pass, assemble on all nodes in secondary mode
6898 for idx, inst_disk in enumerate(disks):
6899 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6901 node_disk = node_disk.Copy()
6902 node_disk.UnsetSize()
6903 lu.cfg.SetDiskID(node_disk, node)
6904 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6906 msg = result.fail_msg
6908 is_offline_secondary = (node in instance.secondary_nodes and
6910 lu.LogWarning("Could not prepare block device %s on node %s"
6911 " (is_primary=False, pass=1): %s",
6912 inst_disk.iv_name, node, msg)
6913 if not (ignore_secondaries or is_offline_secondary):
6916 # FIXME: race condition on drbd migration to primary
6918 # 2nd pass, do only the primary node
6919 for idx, inst_disk in enumerate(disks):
6922 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6923 if node != instance.primary_node:
6926 node_disk = node_disk.Copy()
6927 node_disk.UnsetSize()
6928 lu.cfg.SetDiskID(node_disk, node)
6929 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6931 msg = result.fail_msg
6933 lu.LogWarning("Could not prepare block device %s on node %s"
6934 " (is_primary=True, pass=2): %s",
6935 inst_disk.iv_name, node, msg)
6938 dev_path = result.payload
6940 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6942 # leave the disks configured for the primary node
6943 # this is a workaround that would be fixed better by
6944 # improving the logical/physical id handling
6946 lu.cfg.SetDiskID(disk, instance.primary_node)
6948 return disks_ok, device_info
6951 def _StartInstanceDisks(lu, instance, force):
6952 """Start the disks of an instance.
6955 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6956 ignore_secondaries=force)
6958 _ShutdownInstanceDisks(lu, instance)
6959 if force is not None and not force:
6961 hint=("If the message above refers to a secondary node,"
6962 " you can retry the operation using '--force'"))
6963 raise errors.OpExecError("Disk consistency error")
6966 class LUInstanceDeactivateDisks(NoHooksLU):
6967 """Shutdown an instance's disks.
6972 def ExpandNames(self):
6973 self._ExpandAndLockInstance()
6974 self.needed_locks[locking.LEVEL_NODE] = []
6975 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6977 def DeclareLocks(self, level):
6978 if level == locking.LEVEL_NODE:
6979 self._LockInstancesNodes()
6981 def CheckPrereq(self):
6982 """Check prerequisites.
6984 This checks that the instance is in the cluster.
6987 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6988 assert self.instance is not None, \
6989 "Cannot retrieve locked instance %s" % self.op.instance_name
6991 def Exec(self, feedback_fn):
6992 """Deactivate the disks
6995 instance = self.instance
6997 _ShutdownInstanceDisks(self, instance)
6999 _SafeShutdownInstanceDisks(self, instance)
7002 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7003 """Shutdown block devices of an instance.
7005 This function checks if an instance is running, before calling
7006 _ShutdownInstanceDisks.
7009 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7010 _ShutdownInstanceDisks(lu, instance, disks=disks)
7013 def _ExpandCheckDisks(instance, disks):
7014 """Return the instance disks selected by the disks list
7016 @type disks: list of L{objects.Disk} or None
7017 @param disks: selected disks
7018 @rtype: list of L{objects.Disk}
7019 @return: selected instance disks to act on
7023 return instance.disks
7025 if not set(disks).issubset(instance.disks):
7026 raise errors.ProgrammerError("Can only act on disks belonging to the"
7031 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7032 """Shutdown block devices of an instance.
7034 This does the shutdown on all nodes of the instance.
7036 If the ignore_primary is false, errors on the primary node are
7041 disks = _ExpandCheckDisks(instance, disks)
7044 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7045 lu.cfg.SetDiskID(top_disk, node)
7046 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7047 msg = result.fail_msg
7049 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7050 disk.iv_name, node, msg)
7051 if ((node == instance.primary_node and not ignore_primary) or
7052 (node != instance.primary_node and not result.offline)):
7057 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7058 """Checks if a node has enough free memory.
7060 This function checks if a given node has the needed amount of free
7061 memory. In case the node has less memory or we cannot get the
7062 information from the node, this function raises an OpPrereqError
7065 @type lu: C{LogicalUnit}
7066 @param lu: a logical unit from which we get configuration data
7068 @param node: the node to check
7069 @type reason: C{str}
7070 @param reason: string to use in the error message
7071 @type requested: C{int}
7072 @param requested: the amount of memory in MiB to check for
7073 @type hypervisor_name: C{str}
7074 @param hypervisor_name: the hypervisor to ask for memory stats
7076 @return: node current free memory
7077 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7078 we cannot check the node
7081 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7082 nodeinfo[node].Raise("Can't get data from node %s" % node,
7083 prereq=True, ecode=errors.ECODE_ENVIRON)
7084 (_, _, (hv_info, )) = nodeinfo[node].payload
7086 free_mem = hv_info.get("memory_free", None)
7087 if not isinstance(free_mem, int):
7088 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7089 " was '%s'" % (node, free_mem),
7090 errors.ECODE_ENVIRON)
7091 if requested > free_mem:
7092 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7093 " needed %s MiB, available %s MiB" %
7094 (node, reason, requested, free_mem),
7099 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7100 """Checks if nodes have enough free disk space in all the VGs.
7102 This function checks if all given nodes have the needed amount of
7103 free disk. In case any node has less disk or we cannot get the
7104 information from the node, this function raises an OpPrereqError
7107 @type lu: C{LogicalUnit}
7108 @param lu: a logical unit from which we get configuration data
7109 @type nodenames: C{list}
7110 @param nodenames: the list of node names to check
7111 @type req_sizes: C{dict}
7112 @param req_sizes: the hash of vg and corresponding amount of disk in
7114 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7115 or we cannot check the node
7118 for vg, req_size in req_sizes.items():
7119 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7122 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7123 """Checks if nodes have enough free disk space in the specified VG.
7125 This function checks if all given nodes have the needed amount of
7126 free disk. In case any node has less disk or we cannot get the
7127 information from the node, this function raises an OpPrereqError
7130 @type lu: C{LogicalUnit}
7131 @param lu: a logical unit from which we get configuration data
7132 @type nodenames: C{list}
7133 @param nodenames: the list of node names to check
7135 @param vg: the volume group to check
7136 @type requested: C{int}
7137 @param requested: the amount of disk in MiB to check for
7138 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7139 or we cannot check the node
7142 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7143 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7144 for node in nodenames:
7145 info = nodeinfo[node]
7146 info.Raise("Cannot get current information from node %s" % node,
7147 prereq=True, ecode=errors.ECODE_ENVIRON)
7148 (_, (vg_info, ), _) = info.payload
7149 vg_free = vg_info.get("vg_free", None)
7150 if not isinstance(vg_free, int):
7151 raise errors.OpPrereqError("Can't compute free disk space on node"
7152 " %s for vg %s, result was '%s'" %
7153 (node, vg, vg_free), errors.ECODE_ENVIRON)
7154 if requested > vg_free:
7155 raise errors.OpPrereqError("Not enough disk space on target node %s"
7156 " vg %s: required %d MiB, available %d MiB" %
7157 (node, vg, requested, vg_free),
7161 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7162 """Checks if nodes have enough physical CPUs
7164 This function checks if all given nodes have the needed number of
7165 physical CPUs. In case any node has less CPUs or we cannot get the
7166 information from the node, this function raises an OpPrereqError
7169 @type lu: C{LogicalUnit}
7170 @param lu: a logical unit from which we get configuration data
7171 @type nodenames: C{list}
7172 @param nodenames: the list of node names to check
7173 @type requested: C{int}
7174 @param requested: the minimum acceptable number of physical CPUs
7175 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7176 or we cannot check the node
7179 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7180 for node in nodenames:
7181 info = nodeinfo[node]
7182 info.Raise("Cannot get current information from node %s" % node,
7183 prereq=True, ecode=errors.ECODE_ENVIRON)
7184 (_, _, (hv_info, )) = info.payload
7185 num_cpus = hv_info.get("cpu_total", None)
7186 if not isinstance(num_cpus, int):
7187 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7188 " on node %s, result was '%s'" %
7189 (node, num_cpus), errors.ECODE_ENVIRON)
7190 if requested > num_cpus:
7191 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7192 "required" % (node, num_cpus, requested),
7196 class LUInstanceStartup(LogicalUnit):
7197 """Starts an instance.
7200 HPATH = "instance-start"
7201 HTYPE = constants.HTYPE_INSTANCE
7204 def CheckArguments(self):
7206 if self.op.beparams:
7207 # fill the beparams dict
7208 objects.UpgradeBeParams(self.op.beparams)
7209 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7211 def ExpandNames(self):
7212 self._ExpandAndLockInstance()
7213 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7215 def DeclareLocks(self, level):
7216 if level == locking.LEVEL_NODE_RES:
7217 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7219 def BuildHooksEnv(self):
7222 This runs on master, primary and secondary nodes of the instance.
7226 "FORCE": self.op.force,
7229 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7233 def BuildHooksNodes(self):
7234 """Build hooks nodes.
7237 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7240 def CheckPrereq(self):
7241 """Check prerequisites.
7243 This checks that the instance is in the cluster.
7246 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7247 assert self.instance is not None, \
7248 "Cannot retrieve locked instance %s" % self.op.instance_name
7251 if self.op.hvparams:
7252 # check hypervisor parameter syntax (locally)
7253 cluster = self.cfg.GetClusterInfo()
7254 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7255 filled_hvp = cluster.FillHV(instance)
7256 filled_hvp.update(self.op.hvparams)
7257 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7258 hv_type.CheckParameterSyntax(filled_hvp)
7259 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7261 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7263 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7265 if self.primary_offline and self.op.ignore_offline_nodes:
7266 self.LogWarning("Ignoring offline primary node")
7268 if self.op.hvparams or self.op.beparams:
7269 self.LogWarning("Overridden parameters are ignored")
7271 _CheckNodeOnline(self, instance.primary_node)
7273 bep = self.cfg.GetClusterInfo().FillBE(instance)
7274 bep.update(self.op.beparams)
7276 # check bridges existence
7277 _CheckInstanceBridgesExist(self, instance)
7279 remote_info = self.rpc.call_instance_info(instance.primary_node,
7281 instance.hypervisor)
7282 remote_info.Raise("Error checking node %s" % instance.primary_node,
7283 prereq=True, ecode=errors.ECODE_ENVIRON)
7284 if not remote_info.payload: # not running already
7285 _CheckNodeFreeMemory(self, instance.primary_node,
7286 "starting instance %s" % instance.name,
7287 bep[constants.BE_MINMEM], instance.hypervisor)
7289 def Exec(self, feedback_fn):
7290 """Start the instance.
7293 instance = self.instance
7294 force = self.op.force
7296 if not self.op.no_remember:
7297 self.cfg.MarkInstanceUp(instance.name)
7299 if self.primary_offline:
7300 assert self.op.ignore_offline_nodes
7301 self.LogInfo("Primary node offline, marked instance as started")
7303 node_current = instance.primary_node
7305 _StartInstanceDisks(self, instance, force)
7308 self.rpc.call_instance_start(node_current,
7309 (instance, self.op.hvparams,
7311 self.op.startup_paused)
7312 msg = result.fail_msg
7314 _ShutdownInstanceDisks(self, instance)
7315 raise errors.OpExecError("Could not start instance: %s" % msg)
7318 class LUInstanceReboot(LogicalUnit):
7319 """Reboot an instance.
7322 HPATH = "instance-reboot"
7323 HTYPE = constants.HTYPE_INSTANCE
7326 def ExpandNames(self):
7327 self._ExpandAndLockInstance()
7329 def BuildHooksEnv(self):
7332 This runs on master, primary and secondary nodes of the instance.
7336 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7337 "REBOOT_TYPE": self.op.reboot_type,
7338 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7341 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7345 def BuildHooksNodes(self):
7346 """Build hooks nodes.
7349 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7352 def CheckPrereq(self):
7353 """Check prerequisites.
7355 This checks that the instance is in the cluster.
7358 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7359 assert self.instance is not None, \
7360 "Cannot retrieve locked instance %s" % self.op.instance_name
7361 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7362 _CheckNodeOnline(self, instance.primary_node)
7364 # check bridges existence
7365 _CheckInstanceBridgesExist(self, instance)
7367 def Exec(self, feedback_fn):
7368 """Reboot the instance.
7371 instance = self.instance
7372 ignore_secondaries = self.op.ignore_secondaries
7373 reboot_type = self.op.reboot_type
7375 remote_info = self.rpc.call_instance_info(instance.primary_node,
7377 instance.hypervisor)
7378 remote_info.Raise("Error checking node %s" % instance.primary_node)
7379 instance_running = bool(remote_info.payload)
7381 node_current = instance.primary_node
7383 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7384 constants.INSTANCE_REBOOT_HARD]:
7385 for disk in instance.disks:
7386 self.cfg.SetDiskID(disk, node_current)
7387 result = self.rpc.call_instance_reboot(node_current, instance,
7389 self.op.shutdown_timeout)
7390 result.Raise("Could not reboot instance")
7392 if instance_running:
7393 result = self.rpc.call_instance_shutdown(node_current, instance,
7394 self.op.shutdown_timeout)
7395 result.Raise("Could not shutdown instance for full reboot")
7396 _ShutdownInstanceDisks(self, instance)
7398 self.LogInfo("Instance %s was already stopped, starting now",
7400 _StartInstanceDisks(self, instance, ignore_secondaries)
7401 result = self.rpc.call_instance_start(node_current,
7402 (instance, None, None), False)
7403 msg = result.fail_msg
7405 _ShutdownInstanceDisks(self, instance)
7406 raise errors.OpExecError("Could not start instance for"
7407 " full reboot: %s" % msg)
7409 self.cfg.MarkInstanceUp(instance.name)
7412 class LUInstanceShutdown(LogicalUnit):
7413 """Shutdown an instance.
7416 HPATH = "instance-stop"
7417 HTYPE = constants.HTYPE_INSTANCE
7420 def ExpandNames(self):
7421 self._ExpandAndLockInstance()
7423 def BuildHooksEnv(self):
7426 This runs on master, primary and secondary nodes of the instance.
7429 env = _BuildInstanceHookEnvByObject(self, self.instance)
7430 env["TIMEOUT"] = self.op.timeout
7433 def BuildHooksNodes(self):
7434 """Build hooks nodes.
7437 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7440 def CheckPrereq(self):
7441 """Check prerequisites.
7443 This checks that the instance is in the cluster.
7446 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7447 assert self.instance is not None, \
7448 "Cannot retrieve locked instance %s" % self.op.instance_name
7450 if not self.op.force:
7451 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7453 self.LogWarning("Ignoring offline instance check")
7455 self.primary_offline = \
7456 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7458 if self.primary_offline and self.op.ignore_offline_nodes:
7459 self.LogWarning("Ignoring offline primary node")
7461 _CheckNodeOnline(self, self.instance.primary_node)
7463 def Exec(self, feedback_fn):
7464 """Shutdown the instance.
7467 instance = self.instance
7468 node_current = instance.primary_node
7469 timeout = self.op.timeout
7471 # If the instance is offline we shouldn't mark it as down, as that
7472 # resets the offline flag.
7473 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7474 self.cfg.MarkInstanceDown(instance.name)
7476 if self.primary_offline:
7477 assert self.op.ignore_offline_nodes
7478 self.LogInfo("Primary node offline, marked instance as stopped")
7480 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7481 msg = result.fail_msg
7483 self.LogWarning("Could not shutdown instance: %s", msg)
7485 _ShutdownInstanceDisks(self, instance)
7488 class LUInstanceReinstall(LogicalUnit):
7489 """Reinstall an instance.
7492 HPATH = "instance-reinstall"
7493 HTYPE = constants.HTYPE_INSTANCE
7496 def ExpandNames(self):
7497 self._ExpandAndLockInstance()
7499 def BuildHooksEnv(self):
7502 This runs on master, primary and secondary nodes of the instance.
7505 return _BuildInstanceHookEnvByObject(self, self.instance)
7507 def BuildHooksNodes(self):
7508 """Build hooks nodes.
7511 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7514 def CheckPrereq(self):
7515 """Check prerequisites.
7517 This checks that the instance is in the cluster and is not running.
7520 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7521 assert instance is not None, \
7522 "Cannot retrieve locked instance %s" % self.op.instance_name
7523 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7524 " offline, cannot reinstall")
7526 if instance.disk_template == constants.DT_DISKLESS:
7527 raise errors.OpPrereqError("Instance '%s' has no disks" %
7528 self.op.instance_name,
7530 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7532 if self.op.os_type is not None:
7534 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7535 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7536 instance_os = self.op.os_type
7538 instance_os = instance.os
7540 nodelist = list(instance.all_nodes)
7542 if self.op.osparams:
7543 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7544 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7545 self.os_inst = i_osdict # the new dict (without defaults)
7549 self.instance = instance
7551 def Exec(self, feedback_fn):
7552 """Reinstall the instance.
7555 inst = self.instance
7557 if self.op.os_type is not None:
7558 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7559 inst.os = self.op.os_type
7560 # Write to configuration
7561 self.cfg.Update(inst, feedback_fn)
7563 _StartInstanceDisks(self, inst, None)
7565 feedback_fn("Running the instance OS create scripts...")
7566 # FIXME: pass debug option from opcode to backend
7567 result = self.rpc.call_instance_os_add(inst.primary_node,
7568 (inst, self.os_inst), True,
7569 self.op.debug_level)
7570 result.Raise("Could not install OS for instance %s on node %s" %
7571 (inst.name, inst.primary_node))
7573 _ShutdownInstanceDisks(self, inst)
7576 class LUInstanceRecreateDisks(LogicalUnit):
7577 """Recreate an instance's missing disks.
7580 HPATH = "instance-recreate-disks"
7581 HTYPE = constants.HTYPE_INSTANCE
7584 _MODIFYABLE = compat.UniqueFrozenset([
7585 constants.IDISK_SIZE,
7586 constants.IDISK_MODE,
7589 # New or changed disk parameters may have different semantics
7590 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7591 constants.IDISK_ADOPT,
7593 # TODO: Implement support changing VG while recreating
7595 constants.IDISK_METAVG,
7596 constants.IDISK_PROVIDER,
7599 def _RunAllocator(self):
7600 """Run the allocator based on input opcode.
7603 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7606 # The allocator should actually run in "relocate" mode, but current
7607 # allocators don't support relocating all the nodes of an instance at
7608 # the same time. As a workaround we use "allocate" mode, but this is
7609 # suboptimal for two reasons:
7610 # - The instance name passed to the allocator is present in the list of
7611 # existing instances, so there could be a conflict within the
7612 # internal structures of the allocator. This doesn't happen with the
7613 # current allocators, but it's a liability.
7614 # - The allocator counts the resources used by the instance twice: once
7615 # because the instance exists already, and once because it tries to
7616 # allocate a new instance.
7617 # The allocator could choose some of the nodes on which the instance is
7618 # running, but that's not a problem. If the instance nodes are broken,
7619 # they should be already be marked as drained or offline, and hence
7620 # skipped by the allocator. If instance disks have been lost for other
7621 # reasons, then recreating the disks on the same nodes should be fine.
7622 disk_template = self.instance.disk_template
7623 spindle_use = be_full[constants.BE_SPINDLE_USE]
7624 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7625 disk_template=disk_template,
7626 tags=list(self.instance.GetTags()),
7627 os=self.instance.os,
7629 vcpus=be_full[constants.BE_VCPUS],
7630 memory=be_full[constants.BE_MAXMEM],
7631 spindle_use=spindle_use,
7632 disks=[{constants.IDISK_SIZE: d.size,
7633 constants.IDISK_MODE: d.mode}
7634 for d in self.instance.disks],
7635 hypervisor=self.instance.hypervisor,
7636 node_whitelist=None)
7637 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7639 ial.Run(self.op.iallocator)
7641 assert req.RequiredNodes() == len(self.instance.all_nodes)
7644 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7645 " %s" % (self.op.iallocator, ial.info),
7648 self.op.nodes = ial.result
7649 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7650 self.op.instance_name, self.op.iallocator,
7651 utils.CommaJoin(ial.result))
7653 def CheckArguments(self):
7654 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7655 # Normalize and convert deprecated list of disk indices
7656 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7658 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7660 raise errors.OpPrereqError("Some disks have been specified more than"
7661 " once: %s" % utils.CommaJoin(duplicates),
7664 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7665 # when neither iallocator nor nodes are specified
7666 if self.op.iallocator or self.op.nodes:
7667 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7669 for (idx, params) in self.op.disks:
7670 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7671 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7673 raise errors.OpPrereqError("Parameters for disk %s try to change"
7674 " unmodifyable parameter(s): %s" %
7675 (idx, utils.CommaJoin(unsupported)),
7678 def ExpandNames(self):
7679 self._ExpandAndLockInstance()
7680 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7683 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7684 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7686 self.needed_locks[locking.LEVEL_NODE] = []
7687 if self.op.iallocator:
7688 # iallocator will select a new node in the same group
7689 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7690 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7692 self.needed_locks[locking.LEVEL_NODE_RES] = []
7694 def DeclareLocks(self, level):
7695 if level == locking.LEVEL_NODEGROUP:
7696 assert self.op.iallocator is not None
7697 assert not self.op.nodes
7698 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7699 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7700 # Lock the primary group used by the instance optimistically; this
7701 # requires going via the node before it's locked, requiring
7702 # verification later on
7703 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7704 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7706 elif level == locking.LEVEL_NODE:
7707 # If an allocator is used, then we lock all the nodes in the current
7708 # instance group, as we don't know yet which ones will be selected;
7709 # if we replace the nodes without using an allocator, locks are
7710 # already declared in ExpandNames; otherwise, we need to lock all the
7711 # instance nodes for disk re-creation
7712 if self.op.iallocator:
7713 assert not self.op.nodes
7714 assert not self.needed_locks[locking.LEVEL_NODE]
7715 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7717 # Lock member nodes of the group of the primary node
7718 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7719 self.needed_locks[locking.LEVEL_NODE].extend(
7720 self.cfg.GetNodeGroup(group_uuid).members)
7722 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7723 elif not self.op.nodes:
7724 self._LockInstancesNodes(primary_only=False)
7725 elif level == locking.LEVEL_NODE_RES:
7727 self.needed_locks[locking.LEVEL_NODE_RES] = \
7728 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7730 def BuildHooksEnv(self):
7733 This runs on master, primary and secondary nodes of the instance.
7736 return _BuildInstanceHookEnvByObject(self, self.instance)
7738 def BuildHooksNodes(self):
7739 """Build hooks nodes.
7742 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7745 def CheckPrereq(self):
7746 """Check prerequisites.
7748 This checks that the instance is in the cluster and is not running.
7751 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7752 assert instance is not None, \
7753 "Cannot retrieve locked instance %s" % self.op.instance_name
7755 if len(self.op.nodes) != len(instance.all_nodes):
7756 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7757 " %d replacement nodes were specified" %
7758 (instance.name, len(instance.all_nodes),
7759 len(self.op.nodes)),
7761 assert instance.disk_template != constants.DT_DRBD8 or \
7762 len(self.op.nodes) == 2
7763 assert instance.disk_template != constants.DT_PLAIN or \
7764 len(self.op.nodes) == 1
7765 primary_node = self.op.nodes[0]
7767 primary_node = instance.primary_node
7768 if not self.op.iallocator:
7769 _CheckNodeOnline(self, primary_node)
7771 if instance.disk_template == constants.DT_DISKLESS:
7772 raise errors.OpPrereqError("Instance '%s' has no disks" %
7773 self.op.instance_name, errors.ECODE_INVAL)
7775 # Verify if node group locks are still correct
7776 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7778 # Node group locks are acquired only for the primary node (and only
7779 # when the allocator is used)
7780 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7783 # if we replace nodes *and* the old primary is offline, we don't
7784 # check the instance state
7785 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7786 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7787 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7788 msg="cannot recreate disks")
7791 self.disks = dict(self.op.disks)
7793 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7795 maxidx = max(self.disks.keys())
7796 if maxidx >= len(instance.disks):
7797 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7800 if ((self.op.nodes or self.op.iallocator) and
7801 sorted(self.disks.keys()) != range(len(instance.disks))):
7802 raise errors.OpPrereqError("Can't recreate disks partially and"
7803 " change the nodes at the same time",
7806 self.instance = instance
7808 if self.op.iallocator:
7809 self._RunAllocator()
7810 # Release unneeded node and node resource locks
7811 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7812 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7813 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7815 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7817 def Exec(self, feedback_fn):
7818 """Recreate the disks.
7821 instance = self.instance
7823 assert (self.owned_locks(locking.LEVEL_NODE) ==
7824 self.owned_locks(locking.LEVEL_NODE_RES))
7827 mods = [] # keeps track of needed changes
7829 for idx, disk in enumerate(instance.disks):
7831 changes = self.disks[idx]
7833 # Disk should not be recreated
7837 # update secondaries for disks, if needed
7838 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7839 # need to update the nodes and minors
7840 assert len(self.op.nodes) == 2
7841 assert len(disk.logical_id) == 6 # otherwise disk internals
7843 (_, _, old_port, _, _, old_secret) = disk.logical_id
7844 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7845 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7846 new_minors[0], new_minors[1], old_secret)
7847 assert len(disk.logical_id) == len(new_id)
7851 mods.append((idx, new_id, changes))
7853 # now that we have passed all asserts above, we can apply the mods
7854 # in a single run (to avoid partial changes)
7855 for idx, new_id, changes in mods:
7856 disk = instance.disks[idx]
7857 if new_id is not None:
7858 assert disk.dev_type == constants.LD_DRBD8
7859 disk.logical_id = new_id
7861 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7862 mode=changes.get(constants.IDISK_MODE, None))
7864 # change primary node, if needed
7866 instance.primary_node = self.op.nodes[0]
7867 self.LogWarning("Changing the instance's nodes, you will have to"
7868 " remove any disks left on the older nodes manually")
7871 self.cfg.Update(instance, feedback_fn)
7873 # All touched nodes must be locked
7874 mylocks = self.owned_locks(locking.LEVEL_NODE)
7875 assert mylocks.issuperset(frozenset(instance.all_nodes))
7876 _CreateDisks(self, instance, to_skip=to_skip)
7879 class LUInstanceRename(LogicalUnit):
7880 """Rename an instance.
7883 HPATH = "instance-rename"
7884 HTYPE = constants.HTYPE_INSTANCE
7886 def CheckArguments(self):
7890 if self.op.ip_check and not self.op.name_check:
7891 # TODO: make the ip check more flexible and not depend on the name check
7892 raise errors.OpPrereqError("IP address check requires a name check",
7895 def BuildHooksEnv(self):
7898 This runs on master, primary and secondary nodes of the instance.
7901 env = _BuildInstanceHookEnvByObject(self, self.instance)
7902 env["INSTANCE_NEW_NAME"] = self.op.new_name
7905 def BuildHooksNodes(self):
7906 """Build hooks nodes.
7909 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7912 def CheckPrereq(self):
7913 """Check prerequisites.
7915 This checks that the instance is in the cluster and is not running.
7918 self.op.instance_name = _ExpandInstanceName(self.cfg,
7919 self.op.instance_name)
7920 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7921 assert instance is not None
7922 _CheckNodeOnline(self, instance.primary_node)
7923 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7924 msg="cannot rename")
7925 self.instance = instance
7927 new_name = self.op.new_name
7928 if self.op.name_check:
7929 hostname = _CheckHostnameSane(self, new_name)
7930 new_name = self.op.new_name = hostname.name
7931 if (self.op.ip_check and
7932 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7933 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7934 (hostname.ip, new_name),
7935 errors.ECODE_NOTUNIQUE)
7937 instance_list = self.cfg.GetInstanceList()
7938 if new_name in instance_list and new_name != instance.name:
7939 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7940 new_name, errors.ECODE_EXISTS)
7942 def Exec(self, feedback_fn):
7943 """Rename the instance.
7946 inst = self.instance
7947 old_name = inst.name
7949 rename_file_storage = False
7950 if (inst.disk_template in constants.DTS_FILEBASED and
7951 self.op.new_name != inst.name):
7952 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7953 rename_file_storage = True
7955 self.cfg.RenameInstance(inst.name, self.op.new_name)
7956 # Change the instance lock. This is definitely safe while we hold the BGL.
7957 # Otherwise the new lock would have to be added in acquired mode.
7959 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7960 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7961 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7963 # re-read the instance from the configuration after rename
7964 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7966 if rename_file_storage:
7967 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7968 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7969 old_file_storage_dir,
7970 new_file_storage_dir)
7971 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7972 " (but the instance has been renamed in Ganeti)" %
7973 (inst.primary_node, old_file_storage_dir,
7974 new_file_storage_dir))
7976 _StartInstanceDisks(self, inst, None)
7977 # update info on disks
7978 info = _GetInstanceInfoText(inst)
7979 for (idx, disk) in enumerate(inst.disks):
7980 for node in inst.all_nodes:
7981 self.cfg.SetDiskID(disk, node)
7982 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7984 self.LogWarning("Error setting info on node %s for disk %s: %s",
7985 node, idx, result.fail_msg)
7987 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7988 old_name, self.op.debug_level)
7989 msg = result.fail_msg
7991 msg = ("Could not run OS rename script for instance %s on node %s"
7992 " (but the instance has been renamed in Ganeti): %s" %
7993 (inst.name, inst.primary_node, msg))
7994 self.LogWarning(msg)
7996 _ShutdownInstanceDisks(self, inst)
8001 class LUInstanceRemove(LogicalUnit):
8002 """Remove an instance.
8005 HPATH = "instance-remove"
8006 HTYPE = constants.HTYPE_INSTANCE
8009 def ExpandNames(self):
8010 self._ExpandAndLockInstance()
8011 self.needed_locks[locking.LEVEL_NODE] = []
8012 self.needed_locks[locking.LEVEL_NODE_RES] = []
8013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8015 def DeclareLocks(self, level):
8016 if level == locking.LEVEL_NODE:
8017 self._LockInstancesNodes()
8018 elif level == locking.LEVEL_NODE_RES:
8020 self.needed_locks[locking.LEVEL_NODE_RES] = \
8021 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8023 def BuildHooksEnv(self):
8026 This runs on master, primary and secondary nodes of the instance.
8029 env = _BuildInstanceHookEnvByObject(self, self.instance)
8030 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8033 def BuildHooksNodes(self):
8034 """Build hooks nodes.
8037 nl = [self.cfg.GetMasterNode()]
8038 nl_post = list(self.instance.all_nodes) + nl
8039 return (nl, nl_post)
8041 def CheckPrereq(self):
8042 """Check prerequisites.
8044 This checks that the instance is in the cluster.
8047 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8048 assert self.instance is not None, \
8049 "Cannot retrieve locked instance %s" % self.op.instance_name
8051 def Exec(self, feedback_fn):
8052 """Remove the instance.
8055 instance = self.instance
8056 logging.info("Shutting down instance %s on node %s",
8057 instance.name, instance.primary_node)
8059 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8060 self.op.shutdown_timeout)
8061 msg = result.fail_msg
8063 if self.op.ignore_failures:
8064 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8066 raise errors.OpExecError("Could not shutdown instance %s on"
8068 (instance.name, instance.primary_node, msg))
8070 assert (self.owned_locks(locking.LEVEL_NODE) ==
8071 self.owned_locks(locking.LEVEL_NODE_RES))
8072 assert not (set(instance.all_nodes) -
8073 self.owned_locks(locking.LEVEL_NODE)), \
8074 "Not owning correct locks"
8076 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8079 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8080 """Utility function to remove an instance.
8083 logging.info("Removing block devices for instance %s", instance.name)
8085 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8086 if not ignore_failures:
8087 raise errors.OpExecError("Can't remove instance's disks")
8088 feedback_fn("Warning: can't remove instance's disks")
8090 logging.info("Removing instance %s out of cluster config", instance.name)
8092 lu.cfg.RemoveInstance(instance.name)
8094 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8095 "Instance lock removal conflict"
8097 # Remove lock for the instance
8098 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8101 class LUInstanceQuery(NoHooksLU):
8102 """Logical unit for querying instances.
8105 # pylint: disable=W0142
8108 def CheckArguments(self):
8109 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8110 self.op.output_fields, self.op.use_locking)
8112 def ExpandNames(self):
8113 self.iq.ExpandNames(self)
8115 def DeclareLocks(self, level):
8116 self.iq.DeclareLocks(self, level)
8118 def Exec(self, feedback_fn):
8119 return self.iq.OldStyleQuery(self)
8122 def _ExpandNamesForMigration(lu):
8123 """Expands names for use with L{TLMigrateInstance}.
8125 @type lu: L{LogicalUnit}
8128 if lu.op.target_node is not None:
8129 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8131 lu.needed_locks[locking.LEVEL_NODE] = []
8132 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8134 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8135 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8137 # The node allocation lock is actually only needed for replicated instances
8138 # (e.g. DRBD8) and if an iallocator is used.
8139 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8142 def _DeclareLocksForMigration(lu, level):
8143 """Declares locks for L{TLMigrateInstance}.
8145 @type lu: L{LogicalUnit}
8146 @param level: Lock level
8149 if level == locking.LEVEL_NODE_ALLOC:
8150 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8152 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8154 # Node locks are already declared here rather than at LEVEL_NODE as we need
8155 # the instance object anyway to declare the node allocation lock.
8156 if instance.disk_template in constants.DTS_EXT_MIRROR:
8157 if lu.op.target_node is None:
8158 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8159 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8161 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8163 del lu.recalculate_locks[locking.LEVEL_NODE]
8165 lu._LockInstancesNodes() # pylint: disable=W0212
8167 elif level == locking.LEVEL_NODE:
8168 # Node locks are declared together with the node allocation lock
8169 assert (lu.needed_locks[locking.LEVEL_NODE] or
8170 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8172 elif level == locking.LEVEL_NODE_RES:
8174 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8175 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8178 class LUInstanceFailover(LogicalUnit):
8179 """Failover an instance.
8182 HPATH = "instance-failover"
8183 HTYPE = constants.HTYPE_INSTANCE
8186 def CheckArguments(self):
8187 """Check the arguments.
8190 self.iallocator = getattr(self.op, "iallocator", None)
8191 self.target_node = getattr(self.op, "target_node", None)
8193 def ExpandNames(self):
8194 self._ExpandAndLockInstance()
8195 _ExpandNamesForMigration(self)
8198 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8199 self.op.ignore_consistency, True,
8200 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8202 self.tasklets = [self._migrater]
8204 def DeclareLocks(self, level):
8205 _DeclareLocksForMigration(self, level)
8207 def BuildHooksEnv(self):
8210 This runs on master, primary and secondary nodes of the instance.
8213 instance = self._migrater.instance
8214 source_node = instance.primary_node
8215 target_node = self.op.target_node
8217 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8218 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8219 "OLD_PRIMARY": source_node,
8220 "NEW_PRIMARY": target_node,
8223 if instance.disk_template in constants.DTS_INT_MIRROR:
8224 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8225 env["NEW_SECONDARY"] = source_node
8227 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8229 env.update(_BuildInstanceHookEnvByObject(self, instance))
8233 def BuildHooksNodes(self):
8234 """Build hooks nodes.
8237 instance = self._migrater.instance
8238 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8239 return (nl, nl + [instance.primary_node])
8242 class LUInstanceMigrate(LogicalUnit):
8243 """Migrate an instance.
8245 This is migration without shutting down, compared to the failover,
8246 which is done with shutdown.
8249 HPATH = "instance-migrate"
8250 HTYPE = constants.HTYPE_INSTANCE
8253 def ExpandNames(self):
8254 self._ExpandAndLockInstance()
8255 _ExpandNamesForMigration(self)
8258 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8259 False, self.op.allow_failover, False,
8260 self.op.allow_runtime_changes,
8261 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8262 self.op.ignore_ipolicy)
8264 self.tasklets = [self._migrater]
8266 def DeclareLocks(self, level):
8267 _DeclareLocksForMigration(self, level)
8269 def BuildHooksEnv(self):
8272 This runs on master, primary and secondary nodes of the instance.
8275 instance = self._migrater.instance
8276 source_node = instance.primary_node
8277 target_node = self.op.target_node
8278 env = _BuildInstanceHookEnvByObject(self, instance)
8280 "MIGRATE_LIVE": self._migrater.live,
8281 "MIGRATE_CLEANUP": self.op.cleanup,
8282 "OLD_PRIMARY": source_node,
8283 "NEW_PRIMARY": target_node,
8284 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8287 if instance.disk_template in constants.DTS_INT_MIRROR:
8288 env["OLD_SECONDARY"] = target_node
8289 env["NEW_SECONDARY"] = source_node
8291 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8295 def BuildHooksNodes(self):
8296 """Build hooks nodes.
8299 instance = self._migrater.instance
8300 snodes = list(instance.secondary_nodes)
8301 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8305 class LUInstanceMove(LogicalUnit):
8306 """Move an instance by data-copying.
8309 HPATH = "instance-move"
8310 HTYPE = constants.HTYPE_INSTANCE
8313 def ExpandNames(self):
8314 self._ExpandAndLockInstance()
8315 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8316 self.op.target_node = target_node
8317 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8318 self.needed_locks[locking.LEVEL_NODE_RES] = []
8319 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8321 def DeclareLocks(self, level):
8322 if level == locking.LEVEL_NODE:
8323 self._LockInstancesNodes(primary_only=True)
8324 elif level == locking.LEVEL_NODE_RES:
8326 self.needed_locks[locking.LEVEL_NODE_RES] = \
8327 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8329 def BuildHooksEnv(self):
8332 This runs on master, primary and secondary nodes of the instance.
8336 "TARGET_NODE": self.op.target_node,
8337 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8339 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8342 def BuildHooksNodes(self):
8343 """Build hooks nodes.
8347 self.cfg.GetMasterNode(),
8348 self.instance.primary_node,
8349 self.op.target_node,
8353 def CheckPrereq(self):
8354 """Check prerequisites.
8356 This checks that the instance is in the cluster.
8359 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8360 assert self.instance is not None, \
8361 "Cannot retrieve locked instance %s" % self.op.instance_name
8363 node = self.cfg.GetNodeInfo(self.op.target_node)
8364 assert node is not None, \
8365 "Cannot retrieve locked node %s" % self.op.target_node
8367 self.target_node = target_node = node.name
8369 if target_node == instance.primary_node:
8370 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8371 (instance.name, target_node),
8374 bep = self.cfg.GetClusterInfo().FillBE(instance)
8376 for idx, dsk in enumerate(instance.disks):
8377 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8378 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8379 " cannot copy" % idx, errors.ECODE_STATE)
8381 _CheckNodeOnline(self, target_node)
8382 _CheckNodeNotDrained(self, target_node)
8383 _CheckNodeVmCapable(self, target_node)
8384 cluster = self.cfg.GetClusterInfo()
8385 group_info = self.cfg.GetNodeGroup(node.group)
8386 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8387 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8388 ignore=self.op.ignore_ipolicy)
8390 if instance.admin_state == constants.ADMINST_UP:
8391 # check memory requirements on the secondary node
8392 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8393 instance.name, bep[constants.BE_MAXMEM],
8394 instance.hypervisor)
8396 self.LogInfo("Not checking memory on the secondary node as"
8397 " instance will not be started")
8399 # check bridge existance
8400 _CheckInstanceBridgesExist(self, instance, node=target_node)
8402 def Exec(self, feedback_fn):
8403 """Move an instance.
8405 The move is done by shutting it down on its present node, copying
8406 the data over (slow) and starting it on the new node.
8409 instance = self.instance
8411 source_node = instance.primary_node
8412 target_node = self.target_node
8414 self.LogInfo("Shutting down instance %s on source node %s",
8415 instance.name, source_node)
8417 assert (self.owned_locks(locking.LEVEL_NODE) ==
8418 self.owned_locks(locking.LEVEL_NODE_RES))
8420 result = self.rpc.call_instance_shutdown(source_node, instance,
8421 self.op.shutdown_timeout)
8422 msg = result.fail_msg
8424 if self.op.ignore_consistency:
8425 self.LogWarning("Could not shutdown instance %s on node %s."
8426 " Proceeding anyway. Please make sure node"
8427 " %s is down. Error details: %s",
8428 instance.name, source_node, source_node, msg)
8430 raise errors.OpExecError("Could not shutdown instance %s on"
8432 (instance.name, source_node, msg))
8434 # create the target disks
8436 _CreateDisks(self, instance, target_node=target_node)
8437 except errors.OpExecError:
8438 self.LogWarning("Device creation failed, reverting...")
8440 _RemoveDisks(self, instance, target_node=target_node)
8442 self.cfg.ReleaseDRBDMinors(instance.name)
8445 cluster_name = self.cfg.GetClusterInfo().cluster_name
8448 # activate, get path, copy the data over
8449 for idx, disk in enumerate(instance.disks):
8450 self.LogInfo("Copying data for disk %d", idx)
8451 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8452 instance.name, True, idx)
8454 self.LogWarning("Can't assemble newly created disk %d: %s",
8455 idx, result.fail_msg)
8456 errs.append(result.fail_msg)
8458 dev_path = result.payload
8459 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8460 target_node, dev_path,
8463 self.LogWarning("Can't copy data over for disk %d: %s",
8464 idx, result.fail_msg)
8465 errs.append(result.fail_msg)
8469 self.LogWarning("Some disks failed to copy, aborting")
8471 _RemoveDisks(self, instance, target_node=target_node)
8473 self.cfg.ReleaseDRBDMinors(instance.name)
8474 raise errors.OpExecError("Errors during disk copy: %s" %
8477 instance.primary_node = target_node
8478 self.cfg.Update(instance, feedback_fn)
8480 self.LogInfo("Removing the disks on the original node")
8481 _RemoveDisks(self, instance, target_node=source_node)
8483 # Only start the instance if it's marked as up
8484 if instance.admin_state == constants.ADMINST_UP:
8485 self.LogInfo("Starting instance %s on node %s",
8486 instance.name, target_node)
8488 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8489 ignore_secondaries=True)
8491 _ShutdownInstanceDisks(self, instance)
8492 raise errors.OpExecError("Can't activate the instance's disks")
8494 result = self.rpc.call_instance_start(target_node,
8495 (instance, None, None), False)
8496 msg = result.fail_msg
8498 _ShutdownInstanceDisks(self, instance)
8499 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8500 (instance.name, target_node, msg))
8503 class LUNodeMigrate(LogicalUnit):
8504 """Migrate all instances from a node.
8507 HPATH = "node-migrate"
8508 HTYPE = constants.HTYPE_NODE
8511 def CheckArguments(self):
8514 def ExpandNames(self):
8515 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8517 self.share_locks = _ShareAll()
8518 self.needed_locks = {
8519 locking.LEVEL_NODE: [self.op.node_name],
8522 def BuildHooksEnv(self):
8525 This runs on the master, the primary and all the secondaries.
8529 "NODE_NAME": self.op.node_name,
8530 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8533 def BuildHooksNodes(self):
8534 """Build hooks nodes.
8537 nl = [self.cfg.GetMasterNode()]
8540 def CheckPrereq(self):
8543 def Exec(self, feedback_fn):
8544 # Prepare jobs for migration instances
8545 allow_runtime_changes = self.op.allow_runtime_changes
8547 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8550 iallocator=self.op.iallocator,
8551 target_node=self.op.target_node,
8552 allow_runtime_changes=allow_runtime_changes,
8553 ignore_ipolicy=self.op.ignore_ipolicy)]
8554 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8556 # TODO: Run iallocator in this opcode and pass correct placement options to
8557 # OpInstanceMigrate. Since other jobs can modify the cluster between
8558 # running the iallocator and the actual migration, a good consistency model
8559 # will have to be found.
8561 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8562 frozenset([self.op.node_name]))
8564 return ResultWithJobs(jobs)
8567 class TLMigrateInstance(Tasklet):
8568 """Tasklet class for instance migration.
8571 @ivar live: whether the migration will be done live or non-live;
8572 this variable is initalized only after CheckPrereq has run
8573 @type cleanup: boolean
8574 @ivar cleanup: Wheater we cleanup from a failed migration
8575 @type iallocator: string
8576 @ivar iallocator: The iallocator used to determine target_node
8577 @type target_node: string
8578 @ivar target_node: If given, the target_node to reallocate the instance to
8579 @type failover: boolean
8580 @ivar failover: Whether operation results in failover or migration
8581 @type fallback: boolean
8582 @ivar fallback: Whether fallback to failover is allowed if migration not
8584 @type ignore_consistency: boolean
8585 @ivar ignore_consistency: Wheter we should ignore consistency between source
8587 @type shutdown_timeout: int
8588 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8589 @type ignore_ipolicy: bool
8590 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8595 _MIGRATION_POLL_INTERVAL = 1 # seconds
8596 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8598 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8599 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8601 """Initializes this class.
8604 Tasklet.__init__(self, lu)
8607 self.instance_name = instance_name
8608 self.cleanup = cleanup
8609 self.live = False # will be overridden later
8610 self.failover = failover
8611 self.fallback = fallback
8612 self.ignore_consistency = ignore_consistency
8613 self.shutdown_timeout = shutdown_timeout
8614 self.ignore_ipolicy = ignore_ipolicy
8615 self.allow_runtime_changes = allow_runtime_changes
8617 def CheckPrereq(self):
8618 """Check prerequisites.
8620 This checks that the instance is in the cluster.
8623 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8624 instance = self.cfg.GetInstanceInfo(instance_name)
8625 assert instance is not None
8626 self.instance = instance
8627 cluster = self.cfg.GetClusterInfo()
8629 if (not self.cleanup and
8630 not instance.admin_state == constants.ADMINST_UP and
8631 not self.failover and self.fallback):
8632 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8633 " switching to failover")
8634 self.failover = True
8636 if instance.disk_template not in constants.DTS_MIRRORED:
8641 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8642 " %s" % (instance.disk_template, text),
8645 if instance.disk_template in constants.DTS_EXT_MIRROR:
8646 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8648 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8650 if self.lu.op.iallocator:
8651 self._RunAllocator()
8653 # We set set self.target_node as it is required by
8655 self.target_node = self.lu.op.target_node
8657 # Check that the target node is correct in terms of instance policy
8658 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8659 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8660 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8662 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8663 ignore=self.ignore_ipolicy)
8665 # self.target_node is already populated, either directly or by the
8667 target_node = self.target_node
8668 if self.target_node == instance.primary_node:
8669 raise errors.OpPrereqError("Cannot migrate instance %s"
8670 " to its primary (%s)" %
8671 (instance.name, instance.primary_node),
8674 if len(self.lu.tasklets) == 1:
8675 # It is safe to release locks only when we're the only tasklet
8677 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8678 keep=[instance.primary_node, self.target_node])
8679 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8682 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8684 secondary_nodes = instance.secondary_nodes
8685 if not secondary_nodes:
8686 raise errors.ConfigurationError("No secondary node but using"
8687 " %s disk template" %
8688 instance.disk_template)
8689 target_node = secondary_nodes[0]
8690 if self.lu.op.iallocator or (self.lu.op.target_node and
8691 self.lu.op.target_node != target_node):
8693 text = "failed over"
8696 raise errors.OpPrereqError("Instances with disk template %s cannot"
8697 " be %s to arbitrary nodes"
8698 " (neither an iallocator nor a target"
8699 " node can be passed)" %
8700 (instance.disk_template, text),
8702 nodeinfo = self.cfg.GetNodeInfo(target_node)
8703 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8704 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8706 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8707 ignore=self.ignore_ipolicy)
8709 i_be = cluster.FillBE(instance)
8711 # check memory requirements on the secondary node
8712 if (not self.cleanup and
8713 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8714 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8715 "migrating instance %s" %
8717 i_be[constants.BE_MINMEM],
8718 instance.hypervisor)
8720 self.lu.LogInfo("Not checking memory on the secondary node as"
8721 " instance will not be started")
8723 # check if failover must be forced instead of migration
8724 if (not self.cleanup and not self.failover and
8725 i_be[constants.BE_ALWAYS_FAILOVER]):
8726 self.lu.LogInfo("Instance configured to always failover; fallback"
8728 self.failover = True
8730 # check bridge existance
8731 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8733 if not self.cleanup:
8734 _CheckNodeNotDrained(self.lu, target_node)
8735 if not self.failover:
8736 result = self.rpc.call_instance_migratable(instance.primary_node,
8738 if result.fail_msg and self.fallback:
8739 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8741 self.failover = True
8743 result.Raise("Can't migrate, please use failover",
8744 prereq=True, ecode=errors.ECODE_STATE)
8746 assert not (self.failover and self.cleanup)
8748 if not self.failover:
8749 if self.lu.op.live is not None and self.lu.op.mode is not None:
8750 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8751 " parameters are accepted",
8753 if self.lu.op.live is not None:
8755 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8757 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8758 # reset the 'live' parameter to None so that repeated
8759 # invocations of CheckPrereq do not raise an exception
8760 self.lu.op.live = None
8761 elif self.lu.op.mode is None:
8762 # read the default value from the hypervisor
8763 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8764 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8766 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8768 # Failover is never live
8771 if not (self.failover or self.cleanup):
8772 remote_info = self.rpc.call_instance_info(instance.primary_node,
8774 instance.hypervisor)
8775 remote_info.Raise("Error checking instance on node %s" %
8776 instance.primary_node)
8777 instance_running = bool(remote_info.payload)
8778 if instance_running:
8779 self.current_mem = int(remote_info.payload["memory"])
8781 def _RunAllocator(self):
8782 """Run the allocator based on input opcode.
8785 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8787 # FIXME: add a self.ignore_ipolicy option
8788 req = iallocator.IAReqRelocate(name=self.instance_name,
8789 relocate_from=[self.instance.primary_node])
8790 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8792 ial.Run(self.lu.op.iallocator)
8795 raise errors.OpPrereqError("Can't compute nodes using"
8796 " iallocator '%s': %s" %
8797 (self.lu.op.iallocator, ial.info),
8799 self.target_node = ial.result[0]
8800 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8801 self.instance_name, self.lu.op.iallocator,
8802 utils.CommaJoin(ial.result))
8804 def _WaitUntilSync(self):
8805 """Poll with custom rpc for disk sync.
8807 This uses our own step-based rpc call.
8810 self.feedback_fn("* wait until resync is done")
8814 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8816 (self.instance.disks,
8819 for node, nres in result.items():
8820 nres.Raise("Cannot resync disks on node %s" % node)
8821 node_done, node_percent = nres.payload
8822 all_done = all_done and node_done
8823 if node_percent is not None:
8824 min_percent = min(min_percent, node_percent)
8826 if min_percent < 100:
8827 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8830 def _EnsureSecondary(self, node):
8831 """Demote a node to secondary.
8834 self.feedback_fn("* switching node %s to secondary mode" % node)
8836 for dev in self.instance.disks:
8837 self.cfg.SetDiskID(dev, node)
8839 result = self.rpc.call_blockdev_close(node, self.instance.name,
8840 self.instance.disks)
8841 result.Raise("Cannot change disk to secondary on node %s" % node)
8843 def _GoStandalone(self):
8844 """Disconnect from the network.
8847 self.feedback_fn("* changing into standalone mode")
8848 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8849 self.instance.disks)
8850 for node, nres in result.items():
8851 nres.Raise("Cannot disconnect disks node %s" % node)
8853 def _GoReconnect(self, multimaster):
8854 """Reconnect to the network.
8860 msg = "single-master"
8861 self.feedback_fn("* changing disks into %s mode" % msg)
8862 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8863 (self.instance.disks, self.instance),
8864 self.instance.name, multimaster)
8865 for node, nres in result.items():
8866 nres.Raise("Cannot change disks config on node %s" % node)
8868 def _ExecCleanup(self):
8869 """Try to cleanup after a failed migration.
8871 The cleanup is done by:
8872 - check that the instance is running only on one node
8873 (and update the config if needed)
8874 - change disks on its secondary node to secondary
8875 - wait until disks are fully synchronized
8876 - disconnect from the network
8877 - change disks into single-master mode
8878 - wait again until disks are fully synchronized
8881 instance = self.instance
8882 target_node = self.target_node
8883 source_node = self.source_node
8885 # check running on only one node
8886 self.feedback_fn("* checking where the instance actually runs"
8887 " (if this hangs, the hypervisor might be in"
8889 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8890 for node, result in ins_l.items():
8891 result.Raise("Can't contact node %s" % node)
8893 runningon_source = instance.name in ins_l[source_node].payload
8894 runningon_target = instance.name in ins_l[target_node].payload
8896 if runningon_source and runningon_target:
8897 raise errors.OpExecError("Instance seems to be running on two nodes,"
8898 " or the hypervisor is confused; you will have"
8899 " to ensure manually that it runs only on one"
8900 " and restart this operation")
8902 if not (runningon_source or runningon_target):
8903 raise errors.OpExecError("Instance does not seem to be running at all;"
8904 " in this case it's safer to repair by"
8905 " running 'gnt-instance stop' to ensure disk"
8906 " shutdown, and then restarting it")
8908 if runningon_target:
8909 # the migration has actually succeeded, we need to update the config
8910 self.feedback_fn("* instance running on secondary node (%s),"
8911 " updating config" % target_node)
8912 instance.primary_node = target_node
8913 self.cfg.Update(instance, self.feedback_fn)
8914 demoted_node = source_node
8916 self.feedback_fn("* instance confirmed to be running on its"
8917 " primary node (%s)" % source_node)
8918 demoted_node = target_node
8920 if instance.disk_template in constants.DTS_INT_MIRROR:
8921 self._EnsureSecondary(demoted_node)
8923 self._WaitUntilSync()
8924 except errors.OpExecError:
8925 # we ignore here errors, since if the device is standalone, it
8926 # won't be able to sync
8928 self._GoStandalone()
8929 self._GoReconnect(False)
8930 self._WaitUntilSync()
8932 self.feedback_fn("* done")
8934 def _RevertDiskStatus(self):
8935 """Try to revert the disk status after a failed migration.
8938 target_node = self.target_node
8939 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8943 self._EnsureSecondary(target_node)
8944 self._GoStandalone()
8945 self._GoReconnect(False)
8946 self._WaitUntilSync()
8947 except errors.OpExecError, err:
8948 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8949 " please try to recover the instance manually;"
8950 " error '%s'" % str(err))
8952 def _AbortMigration(self):
8953 """Call the hypervisor code to abort a started migration.
8956 instance = self.instance
8957 target_node = self.target_node
8958 source_node = self.source_node
8959 migration_info = self.migration_info
8961 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8965 abort_msg = abort_result.fail_msg
8967 logging.error("Aborting migration failed on target node %s: %s",
8968 target_node, abort_msg)
8969 # Don't raise an exception here, as we stil have to try to revert the
8970 # disk status, even if this step failed.
8972 abort_result = self.rpc.call_instance_finalize_migration_src(
8973 source_node, instance, False, self.live)
8974 abort_msg = abort_result.fail_msg
8976 logging.error("Aborting migration failed on source node %s: %s",
8977 source_node, abort_msg)
8979 def _ExecMigration(self):
8980 """Migrate an instance.
8982 The migrate is done by:
8983 - change the disks into dual-master mode
8984 - wait until disks are fully synchronized again
8985 - migrate the instance
8986 - change disks on the new secondary node (the old primary) to secondary
8987 - wait until disks are fully synchronized
8988 - change disks into single-master mode
8991 instance = self.instance
8992 target_node = self.target_node
8993 source_node = self.source_node
8995 # Check for hypervisor version mismatch and warn the user.
8996 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8997 None, [self.instance.hypervisor], False)
8998 for ninfo in nodeinfo.values():
8999 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9001 (_, _, (src_info, )) = nodeinfo[source_node].payload
9002 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9004 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9005 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9006 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9007 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9008 if src_version != dst_version:
9009 self.feedback_fn("* warning: hypervisor version mismatch between"
9010 " source (%s) and target (%s) node" %
9011 (src_version, dst_version))
9013 self.feedback_fn("* checking disk consistency between source and target")
9014 for (idx, dev) in enumerate(instance.disks):
9015 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9016 raise errors.OpExecError("Disk %s is degraded or not fully"
9017 " synchronized on target node,"
9018 " aborting migration" % idx)
9020 if self.current_mem > self.tgt_free_mem:
9021 if not self.allow_runtime_changes:
9022 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9023 " free memory to fit instance %s on target"
9024 " node %s (have %dMB, need %dMB)" %
9025 (instance.name, target_node,
9026 self.tgt_free_mem, self.current_mem))
9027 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9028 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9031 rpcres.Raise("Cannot modify instance runtime memory")
9033 # First get the migration information from the remote node
9034 result = self.rpc.call_migration_info(source_node, instance)
9035 msg = result.fail_msg
9037 log_err = ("Failed fetching source migration information from %s: %s" %
9039 logging.error(log_err)
9040 raise errors.OpExecError(log_err)
9042 self.migration_info = migration_info = result.payload
9044 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9045 # Then switch the disks to master/master mode
9046 self._EnsureSecondary(target_node)
9047 self._GoStandalone()
9048 self._GoReconnect(True)
9049 self._WaitUntilSync()
9051 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9052 result = self.rpc.call_accept_instance(target_node,
9055 self.nodes_ip[target_node])
9057 msg = result.fail_msg
9059 logging.error("Instance pre-migration failed, trying to revert"
9060 " disk status: %s", msg)
9061 self.feedback_fn("Pre-migration failed, aborting")
9062 self._AbortMigration()
9063 self._RevertDiskStatus()
9064 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9065 (instance.name, msg))
9067 self.feedback_fn("* migrating instance to %s" % target_node)
9068 result = self.rpc.call_instance_migrate(source_node, instance,
9069 self.nodes_ip[target_node],
9071 msg = result.fail_msg
9073 logging.error("Instance migration failed, trying to revert"
9074 " disk status: %s", msg)
9075 self.feedback_fn("Migration failed, aborting")
9076 self._AbortMigration()
9077 self._RevertDiskStatus()
9078 raise errors.OpExecError("Could not migrate instance %s: %s" %
9079 (instance.name, msg))
9081 self.feedback_fn("* starting memory transfer")
9082 last_feedback = time.time()
9084 result = self.rpc.call_instance_get_migration_status(source_node,
9086 msg = result.fail_msg
9087 ms = result.payload # MigrationStatus instance
9088 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9089 logging.error("Instance migration failed, trying to revert"
9090 " disk status: %s", msg)
9091 self.feedback_fn("Migration failed, aborting")
9092 self._AbortMigration()
9093 self._RevertDiskStatus()
9095 msg = "hypervisor returned failure"
9096 raise errors.OpExecError("Could not migrate instance %s: %s" %
9097 (instance.name, msg))
9099 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9100 self.feedback_fn("* memory transfer complete")
9103 if (utils.TimeoutExpired(last_feedback,
9104 self._MIGRATION_FEEDBACK_INTERVAL) and
9105 ms.transferred_ram is not None):
9106 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9107 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9108 last_feedback = time.time()
9110 time.sleep(self._MIGRATION_POLL_INTERVAL)
9112 result = self.rpc.call_instance_finalize_migration_src(source_node,
9116 msg = result.fail_msg
9118 logging.error("Instance migration succeeded, but finalization failed"
9119 " on the source node: %s", msg)
9120 raise errors.OpExecError("Could not finalize instance migration: %s" %
9123 instance.primary_node = target_node
9125 # distribute new instance config to the other nodes
9126 self.cfg.Update(instance, self.feedback_fn)
9128 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9132 msg = result.fail_msg
9134 logging.error("Instance migration succeeded, but finalization failed"
9135 " on the target node: %s", msg)
9136 raise errors.OpExecError("Could not finalize instance migration: %s" %
9139 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9140 self._EnsureSecondary(source_node)
9141 self._WaitUntilSync()
9142 self._GoStandalone()
9143 self._GoReconnect(False)
9144 self._WaitUntilSync()
9146 # If the instance's disk template is `rbd' or `ext' and there was a
9147 # successful migration, unmap the device from the source node.
9148 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9149 disks = _ExpandCheckDisks(instance, instance.disks)
9150 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9152 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9153 msg = result.fail_msg
9155 logging.error("Migration was successful, but couldn't unmap the"
9156 " block device %s on source node %s: %s",
9157 disk.iv_name, source_node, msg)
9158 logging.error("You need to unmap the device %s manually on %s",
9159 disk.iv_name, source_node)
9161 self.feedback_fn("* done")
9163 def _ExecFailover(self):
9164 """Failover an instance.
9166 The failover is done by shutting it down on its present node and
9167 starting it on the secondary.
9170 instance = self.instance
9171 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9173 source_node = instance.primary_node
9174 target_node = self.target_node
9176 if instance.admin_state == constants.ADMINST_UP:
9177 self.feedback_fn("* checking disk consistency between source and target")
9178 for (idx, dev) in enumerate(instance.disks):
9179 # for drbd, these are drbd over lvm
9180 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9182 if primary_node.offline:
9183 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9185 (primary_node.name, idx, target_node))
9186 elif not self.ignore_consistency:
9187 raise errors.OpExecError("Disk %s is degraded on target node,"
9188 " aborting failover" % idx)
9190 self.feedback_fn("* not checking disk consistency as instance is not"
9193 self.feedback_fn("* shutting down instance on source node")
9194 logging.info("Shutting down instance %s on node %s",
9195 instance.name, source_node)
9197 result = self.rpc.call_instance_shutdown(source_node, instance,
9198 self.shutdown_timeout)
9199 msg = result.fail_msg
9201 if self.ignore_consistency or primary_node.offline:
9202 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9203 " proceeding anyway; please make sure node"
9204 " %s is down; error details: %s",
9205 instance.name, source_node, source_node, msg)
9207 raise errors.OpExecError("Could not shutdown instance %s on"
9209 (instance.name, source_node, msg))
9211 self.feedback_fn("* deactivating the instance's disks on source node")
9212 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9213 raise errors.OpExecError("Can't shut down the instance's disks")
9215 instance.primary_node = target_node
9216 # distribute new instance config to the other nodes
9217 self.cfg.Update(instance, self.feedback_fn)
9219 # Only start the instance if it's marked as up
9220 if instance.admin_state == constants.ADMINST_UP:
9221 self.feedback_fn("* activating the instance's disks on target node %s" %
9223 logging.info("Starting instance %s on node %s",
9224 instance.name, target_node)
9226 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9227 ignore_secondaries=True)
9229 _ShutdownInstanceDisks(self.lu, instance)
9230 raise errors.OpExecError("Can't activate the instance's disks")
9232 self.feedback_fn("* starting the instance on the target node %s" %
9234 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9236 msg = result.fail_msg
9238 _ShutdownInstanceDisks(self.lu, instance)
9239 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9240 (instance.name, target_node, msg))
9242 def Exec(self, feedback_fn):
9243 """Perform the migration.
9246 self.feedback_fn = feedback_fn
9247 self.source_node = self.instance.primary_node
9249 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9250 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9251 self.target_node = self.instance.secondary_nodes[0]
9252 # Otherwise self.target_node has been populated either
9253 # directly, or through an iallocator.
9255 self.all_nodes = [self.source_node, self.target_node]
9256 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9257 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9260 feedback_fn("Failover instance %s" % self.instance.name)
9261 self._ExecFailover()
9263 feedback_fn("Migrating instance %s" % self.instance.name)
9266 return self._ExecCleanup()
9268 return self._ExecMigration()
9271 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9273 """Wrapper around L{_CreateBlockDevInner}.
9275 This method annotates the root device first.
9278 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9279 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9280 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9281 force_open, excl_stor)
9284 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9285 info, force_open, excl_stor):
9286 """Create a tree of block devices on a given node.
9288 If this device type has to be created on secondaries, create it and
9291 If not, just recurse to children keeping the same 'force' value.
9293 @attention: The device has to be annotated already.
9295 @param lu: the lu on whose behalf we execute
9296 @param node: the node on which to create the device
9297 @type instance: L{objects.Instance}
9298 @param instance: the instance which owns the device
9299 @type device: L{objects.Disk}
9300 @param device: the device to create
9301 @type force_create: boolean
9302 @param force_create: whether to force creation of this device; this
9303 will be change to True whenever we find a device which has
9304 CreateOnSecondary() attribute
9305 @param info: the extra 'metadata' we should attach to the device
9306 (this will be represented as a LVM tag)
9307 @type force_open: boolean
9308 @param force_open: this parameter will be passes to the
9309 L{backend.BlockdevCreate} function where it specifies
9310 whether we run on primary or not, and it affects both
9311 the child assembly and the device own Open() execution
9312 @type excl_stor: boolean
9313 @param excl_stor: Whether exclusive_storage is active for the node
9316 if device.CreateOnSecondary():
9320 for child in device.children:
9321 _CreateBlockDevInner(lu, node, instance, child, force_create,
9322 info, force_open, excl_stor)
9324 if not force_create:
9327 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9331 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9333 """Create a single block device on a given node.
9335 This will not recurse over children of the device, so they must be
9338 @param lu: the lu on whose behalf we execute
9339 @param node: the node on which to create the device
9340 @type instance: L{objects.Instance}
9341 @param instance: the instance which owns the device
9342 @type device: L{objects.Disk}
9343 @param device: the device to create
9344 @param info: the extra 'metadata' we should attach to the device
9345 (this will be represented as a LVM tag)
9346 @type force_open: boolean
9347 @param force_open: this parameter will be passes to the
9348 L{backend.BlockdevCreate} function where it specifies
9349 whether we run on primary or not, and it affects both
9350 the child assembly and the device own Open() execution
9351 @type excl_stor: boolean
9352 @param excl_stor: Whether exclusive_storage is active for the node
9355 lu.cfg.SetDiskID(device, node)
9356 result = lu.rpc.call_blockdev_create(node, device, device.size,
9357 instance.name, force_open, info,
9359 result.Raise("Can't create block device %s on"
9360 " node %s for instance %s" % (device, node, instance.name))
9361 if device.physical_id is None:
9362 device.physical_id = result.payload
9365 def _GenerateUniqueNames(lu, exts):
9366 """Generate a suitable LV name.
9368 This will generate a logical volume name for the given instance.
9373 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9374 results.append("%s%s" % (new_id, val))
9378 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9379 iv_name, p_minor, s_minor):
9380 """Generate a drbd8 device complete with its children.
9383 assert len(vgnames) == len(names) == 2
9384 port = lu.cfg.AllocatePort()
9385 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9387 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9388 logical_id=(vgnames[0], names[0]),
9390 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9391 size=constants.DRBD_META_SIZE,
9392 logical_id=(vgnames[1], names[1]),
9394 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9395 logical_id=(primary, secondary, port,
9398 children=[dev_data, dev_meta],
9399 iv_name=iv_name, params={})
9403 _DISK_TEMPLATE_NAME_PREFIX = {
9404 constants.DT_PLAIN: "",
9405 constants.DT_RBD: ".rbd",
9406 constants.DT_EXT: ".ext",
9410 _DISK_TEMPLATE_DEVICE_TYPE = {
9411 constants.DT_PLAIN: constants.LD_LV,
9412 constants.DT_FILE: constants.LD_FILE,
9413 constants.DT_SHARED_FILE: constants.LD_FILE,
9414 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9415 constants.DT_RBD: constants.LD_RBD,
9416 constants.DT_EXT: constants.LD_EXT,
9420 def _GenerateDiskTemplate(
9421 lu, template_name, instance_name, primary_node, secondary_nodes,
9422 disk_info, file_storage_dir, file_driver, base_index,
9423 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9424 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9425 """Generate the entire disk layout for a given template type.
9428 vgname = lu.cfg.GetVGName()
9429 disk_count = len(disk_info)
9432 if template_name == constants.DT_DISKLESS:
9434 elif template_name == constants.DT_DRBD8:
9435 if len(secondary_nodes) != 1:
9436 raise errors.ProgrammerError("Wrong template configuration")
9437 remote_node = secondary_nodes[0]
9438 minors = lu.cfg.AllocateDRBDMinor(
9439 [primary_node, remote_node] * len(disk_info), instance_name)
9441 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9443 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9446 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9447 for i in range(disk_count)]):
9448 names.append(lv_prefix + "_data")
9449 names.append(lv_prefix + "_meta")
9450 for idx, disk in enumerate(disk_info):
9451 disk_index = idx + base_index
9452 data_vg = disk.get(constants.IDISK_VG, vgname)
9453 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9454 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9455 disk[constants.IDISK_SIZE],
9457 names[idx * 2:idx * 2 + 2],
9458 "disk/%d" % disk_index,
9459 minors[idx * 2], minors[idx * 2 + 1])
9460 disk_dev.mode = disk[constants.IDISK_MODE]
9461 disks.append(disk_dev)
9464 raise errors.ProgrammerError("Wrong template configuration")
9466 if template_name == constants.DT_FILE:
9468 elif template_name == constants.DT_SHARED_FILE:
9469 _req_shr_file_storage()
9471 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9472 if name_prefix is None:
9475 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9476 (name_prefix, base_index + i)
9477 for i in range(disk_count)])
9479 if template_name == constants.DT_PLAIN:
9481 def logical_id_fn(idx, _, disk):
9482 vg = disk.get(constants.IDISK_VG, vgname)
9483 return (vg, names[idx])
9485 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9487 lambda _, disk_index, disk: (file_driver,
9488 "%s/disk%d" % (file_storage_dir,
9490 elif template_name == constants.DT_BLOCK:
9492 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9493 disk[constants.IDISK_ADOPT])
9494 elif template_name == constants.DT_RBD:
9495 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9496 elif template_name == constants.DT_EXT:
9497 def logical_id_fn(idx, _, disk):
9498 provider = disk.get(constants.IDISK_PROVIDER, None)
9499 if provider is None:
9500 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9501 " not found", constants.DT_EXT,
9502 constants.IDISK_PROVIDER)
9503 return (provider, names[idx])
9505 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9507 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9509 for idx, disk in enumerate(disk_info):
9511 # Only for the Ext template add disk_info to params
9512 if template_name == constants.DT_EXT:
9513 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9515 if key not in constants.IDISK_PARAMS:
9516 params[key] = disk[key]
9517 disk_index = idx + base_index
9518 size = disk[constants.IDISK_SIZE]
9519 feedback_fn("* disk %s, size %s" %
9520 (disk_index, utils.FormatUnit(size, "h")))
9521 disks.append(objects.Disk(dev_type=dev_type, size=size,
9522 logical_id=logical_id_fn(idx, disk_index, disk),
9523 iv_name="disk/%d" % disk_index,
9524 mode=disk[constants.IDISK_MODE],
9530 def _GetInstanceInfoText(instance):
9531 """Compute that text that should be added to the disk's metadata.
9534 return "originstname+%s" % instance.name
9537 def _CalcEta(time_taken, written, total_size):
9538 """Calculates the ETA based on size written and total size.
9540 @param time_taken: The time taken so far
9541 @param written: amount written so far
9542 @param total_size: The total size of data to be written
9543 @return: The remaining time in seconds
9546 avg_time = time_taken / float(written)
9547 return (total_size - written) * avg_time
9550 def _WipeDisks(lu, instance, disks=None):
9551 """Wipes instance disks.
9553 @type lu: L{LogicalUnit}
9554 @param lu: the logical unit on whose behalf we execute
9555 @type instance: L{objects.Instance}
9556 @param instance: the instance whose disks we should create
9557 @return: the success of the wipe
9560 node = instance.primary_node
9563 disks = [(idx, disk, 0)
9564 for (idx, disk) in enumerate(instance.disks)]
9566 for (_, device, _) in disks:
9567 lu.cfg.SetDiskID(device, node)
9569 logging.info("Pausing synchronization of disks of instance '%s'",
9571 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9572 (map(compat.snd, disks),
9575 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9577 for idx, success in enumerate(result.payload):
9579 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9580 " failed", idx, instance.name)
9583 for (idx, device, offset) in disks:
9584 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9585 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9587 int(min(constants.MAX_WIPE_CHUNK,
9588 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9592 start_time = time.time()
9597 info_text = (" (from %s to %s)" %
9598 (utils.FormatUnit(offset, "h"),
9599 utils.FormatUnit(size, "h")))
9601 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9603 logging.info("Wiping disk %d for instance %s on node %s using"
9604 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9606 while offset < size:
9607 wipe_size = min(wipe_chunk_size, size - offset)
9609 logging.debug("Wiping disk %d, offset %s, chunk %s",
9610 idx, offset, wipe_size)
9612 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9614 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9615 (idx, offset, wipe_size))
9619 if now - last_output >= 60:
9620 eta = _CalcEta(now - start_time, offset, size)
9621 lu.LogInfo(" - done: %.1f%% ETA: %s",
9622 offset / float(size) * 100, utils.FormatSeconds(eta))
9625 logging.info("Resuming synchronization of disks for instance '%s'",
9628 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9629 (map(compat.snd, disks),
9634 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9635 node, result.fail_msg)
9637 for idx, success in enumerate(result.payload):
9639 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9640 " failed", idx, instance.name)
9643 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9644 """Create all disks for an instance.
9646 This abstracts away some work from AddInstance.
9648 @type lu: L{LogicalUnit}
9649 @param lu: the logical unit on whose behalf we execute
9650 @type instance: L{objects.Instance}
9651 @param instance: the instance whose disks we should create
9653 @param to_skip: list of indices to skip
9654 @type target_node: string
9655 @param target_node: if passed, overrides the target node for creation
9657 @return: the success of the creation
9660 info = _GetInstanceInfoText(instance)
9661 if target_node is None:
9662 pnode = instance.primary_node
9663 all_nodes = instance.all_nodes
9668 if instance.disk_template in constants.DTS_FILEBASED:
9669 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9670 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9672 result.Raise("Failed to create directory '%s' on"
9673 " node %s" % (file_storage_dir, pnode))
9675 # Note: this needs to be kept in sync with adding of disks in
9676 # LUInstanceSetParams
9677 for idx, device in enumerate(instance.disks):
9678 if to_skip and idx in to_skip:
9680 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9682 for node in all_nodes:
9683 f_create = node == pnode
9684 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9687 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9688 """Remove all disks for an instance.
9690 This abstracts away some work from `AddInstance()` and
9691 `RemoveInstance()`. Note that in case some of the devices couldn't
9692 be removed, the removal will continue with the other ones (compare
9693 with `_CreateDisks()`).
9695 @type lu: L{LogicalUnit}
9696 @param lu: the logical unit on whose behalf we execute
9697 @type instance: L{objects.Instance}
9698 @param instance: the instance whose disks we should remove
9699 @type target_node: string
9700 @param target_node: used to override the node on which to remove the disks
9702 @return: the success of the removal
9705 logging.info("Removing block devices for instance %s", instance.name)
9708 ports_to_release = set()
9709 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9710 for (idx, device) in enumerate(anno_disks):
9712 edata = [(target_node, device)]
9714 edata = device.ComputeNodeTree(instance.primary_node)
9715 for node, disk in edata:
9716 lu.cfg.SetDiskID(disk, node)
9717 result = lu.rpc.call_blockdev_remove(node, disk)
9719 lu.LogWarning("Could not remove disk %s on node %s,"
9720 " continuing anyway: %s", idx, node, result.fail_msg)
9721 if not (result.offline and node != instance.primary_node):
9724 # if this is a DRBD disk, return its port to the pool
9725 if device.dev_type in constants.LDS_DRBD:
9726 ports_to_release.add(device.logical_id[2])
9728 if all_result or ignore_failures:
9729 for port in ports_to_release:
9730 lu.cfg.AddTcpUdpPort(port)
9732 if instance.disk_template in constants.DTS_FILEBASED:
9733 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9737 tgt = instance.primary_node
9738 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9740 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9741 file_storage_dir, instance.primary_node, result.fail_msg)
9747 def _ComputeDiskSizePerVG(disk_template, disks):
9748 """Compute disk size requirements in the volume group
9751 def _compute(disks, payload):
9752 """Universal algorithm.
9757 vgs[disk[constants.IDISK_VG]] = \
9758 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9762 # Required free disk space as a function of disk and swap space
9764 constants.DT_DISKLESS: {},
9765 constants.DT_PLAIN: _compute(disks, 0),
9766 # 128 MB are added for drbd metadata for each disk
9767 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9768 constants.DT_FILE: {},
9769 constants.DT_SHARED_FILE: {},
9772 if disk_template not in req_size_dict:
9773 raise errors.ProgrammerError("Disk template '%s' size requirement"
9774 " is unknown" % disk_template)
9776 return req_size_dict[disk_template]
9779 def _FilterVmNodes(lu, nodenames):
9780 """Filters out non-vm_capable nodes from a list.
9782 @type lu: L{LogicalUnit}
9783 @param lu: the logical unit for which we check
9784 @type nodenames: list
9785 @param nodenames: the list of nodes on which we should check
9787 @return: the list of vm-capable nodes
9790 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9791 return [name for name in nodenames if name not in vm_nodes]
9794 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9795 """Hypervisor parameter validation.
9797 This function abstract the hypervisor parameter validation to be
9798 used in both instance create and instance modify.
9800 @type lu: L{LogicalUnit}
9801 @param lu: the logical unit for which we check
9802 @type nodenames: list
9803 @param nodenames: the list of nodes on which we should check
9804 @type hvname: string
9805 @param hvname: the name of the hypervisor we should use
9806 @type hvparams: dict
9807 @param hvparams: the parameters which we need to check
9808 @raise errors.OpPrereqError: if the parameters are not valid
9811 nodenames = _FilterVmNodes(lu, nodenames)
9813 cluster = lu.cfg.GetClusterInfo()
9814 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9816 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9817 for node in nodenames:
9821 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9824 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9825 """OS parameters validation.
9827 @type lu: L{LogicalUnit}
9828 @param lu: the logical unit for which we check
9829 @type required: boolean
9830 @param required: whether the validation should fail if the OS is not
9832 @type nodenames: list
9833 @param nodenames: the list of nodes on which we should check
9834 @type osname: string
9835 @param osname: the name of the hypervisor we should use
9836 @type osparams: dict
9837 @param osparams: the parameters which we need to check
9838 @raise errors.OpPrereqError: if the parameters are not valid
9841 nodenames = _FilterVmNodes(lu, nodenames)
9842 result = lu.rpc.call_os_validate(nodenames, required, osname,
9843 [constants.OS_VALIDATE_PARAMETERS],
9845 for node, nres in result.items():
9846 # we don't check for offline cases since this should be run only
9847 # against the master node and/or an instance's nodes
9848 nres.Raise("OS Parameters validation failed on node %s" % node)
9849 if not nres.payload:
9850 lu.LogInfo("OS %s not found on node %s, validation skipped",
9854 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9855 """Wrapper around IAReqInstanceAlloc.
9857 @param op: The instance opcode
9858 @param disks: The computed disks
9859 @param nics: The computed nics
9860 @param beparams: The full filled beparams
9861 @param node_whitelist: List of nodes which should appear as online to the
9862 allocator (unless the node is already marked offline)
9864 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9867 spindle_use = beparams[constants.BE_SPINDLE_USE]
9868 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9869 disk_template=op.disk_template,
9872 vcpus=beparams[constants.BE_VCPUS],
9873 memory=beparams[constants.BE_MAXMEM],
9874 spindle_use=spindle_use,
9876 nics=[n.ToDict() for n in nics],
9877 hypervisor=op.hypervisor,
9878 node_whitelist=node_whitelist)
9881 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9882 """Computes the nics.
9884 @param op: The instance opcode
9885 @param cluster: Cluster configuration object
9886 @param default_ip: The default ip to assign
9887 @param cfg: An instance of the configuration object
9888 @param ec_id: Execution context ID
9890 @returns: The build up nics
9895 nic_mode_req = nic.get(constants.INIC_MODE, None)
9896 nic_mode = nic_mode_req
9897 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9898 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9900 net = nic.get(constants.INIC_NETWORK, None)
9901 link = nic.get(constants.NIC_LINK, None)
9902 ip = nic.get(constants.INIC_IP, None)
9904 if net is None or net.lower() == constants.VALUE_NONE:
9907 if nic_mode_req is not None or link is not None:
9908 raise errors.OpPrereqError("If network is given, no mode or link"
9909 " is allowed to be passed",
9912 # ip validity checks
9913 if ip is None or ip.lower() == constants.VALUE_NONE:
9915 elif ip.lower() == constants.VALUE_AUTO:
9916 if not op.name_check:
9917 raise errors.OpPrereqError("IP address set to auto but name checks"
9918 " have been skipped",
9922 # We defer pool operations until later, so that the iallocator has
9923 # filled in the instance's node(s) dimara
9924 if ip.lower() == constants.NIC_IP_POOL:
9926 raise errors.OpPrereqError("if ip=pool, parameter network"
9927 " must be passed too",
9930 elif not netutils.IPAddress.IsValid(ip):
9931 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9936 # TODO: check the ip address for uniqueness
9937 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9938 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9941 # MAC address verification
9942 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9943 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9944 mac = utils.NormalizeAndValidateMac(mac)
9947 # TODO: We need to factor this out
9948 cfg.ReserveMAC(mac, ec_id)
9949 except errors.ReservationError:
9950 raise errors.OpPrereqError("MAC address %s already in use"
9951 " in cluster" % mac,
9952 errors.ECODE_NOTUNIQUE)
9954 # Build nic parameters
9957 nicparams[constants.NIC_MODE] = nic_mode
9959 nicparams[constants.NIC_LINK] = link
9961 check_params = cluster.SimpleFillNIC(nicparams)
9962 objects.NIC.CheckParameterSyntax(check_params)
9963 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9964 network=net, nicparams=nicparams))
9969 def _ComputeDisks(op, default_vg):
9970 """Computes the instance disks.
9972 @param op: The instance opcode
9973 @param default_vg: The default_vg to assume
9975 @return: The computed disks
9979 for disk in op.disks:
9980 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9981 if mode not in constants.DISK_ACCESS_SET:
9982 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9983 mode, errors.ECODE_INVAL)
9984 size = disk.get(constants.IDISK_SIZE, None)
9986 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9989 except (TypeError, ValueError):
9990 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9993 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9994 if ext_provider and op.disk_template != constants.DT_EXT:
9995 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9996 " disk template, not %s" %
9997 (constants.IDISK_PROVIDER, constants.DT_EXT,
9998 op.disk_template), errors.ECODE_INVAL)
10000 data_vg = disk.get(constants.IDISK_VG, default_vg)
10002 constants.IDISK_SIZE: size,
10003 constants.IDISK_MODE: mode,
10004 constants.IDISK_VG: data_vg,
10007 if constants.IDISK_METAVG in disk:
10008 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10009 if constants.IDISK_ADOPT in disk:
10010 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10012 # For extstorage, demand the `provider' option and add any
10013 # additional parameters (ext-params) to the dict
10014 if op.disk_template == constants.DT_EXT:
10016 new_disk[constants.IDISK_PROVIDER] = ext_provider
10018 if key not in constants.IDISK_PARAMS:
10019 new_disk[key] = disk[key]
10021 raise errors.OpPrereqError("Missing provider for template '%s'" %
10022 constants.DT_EXT, errors.ECODE_INVAL)
10024 disks.append(new_disk)
10029 def _ComputeFullBeParams(op, cluster):
10030 """Computes the full beparams.
10032 @param op: The instance opcode
10033 @param cluster: The cluster config object
10035 @return: The fully filled beparams
10038 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10039 for param, value in op.beparams.iteritems():
10040 if value == constants.VALUE_AUTO:
10041 op.beparams[param] = default_beparams[param]
10042 objects.UpgradeBeParams(op.beparams)
10043 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10044 return cluster.SimpleFillBE(op.beparams)
10047 def _CheckOpportunisticLocking(op):
10048 """Generate error if opportunistic locking is not possible.
10051 if op.opportunistic_locking and not op.iallocator:
10052 raise errors.OpPrereqError("Opportunistic locking is only available in"
10053 " combination with an instance allocator",
10054 errors.ECODE_INVAL)
10057 class LUInstanceCreate(LogicalUnit):
10058 """Create an instance.
10061 HPATH = "instance-add"
10062 HTYPE = constants.HTYPE_INSTANCE
10065 def CheckArguments(self):
10066 """Check arguments.
10069 # do not require name_check to ease forward/backward compatibility
10071 if self.op.no_install and self.op.start:
10072 self.LogInfo("No-installation mode selected, disabling startup")
10073 self.op.start = False
10074 # validate/normalize the instance name
10075 self.op.instance_name = \
10076 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10078 if self.op.ip_check and not self.op.name_check:
10079 # TODO: make the ip check more flexible and not depend on the name check
10080 raise errors.OpPrereqError("Cannot do IP address check without a name"
10081 " check", errors.ECODE_INVAL)
10083 # check nics' parameter names
10084 for nic in self.op.nics:
10085 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10087 # check disks. parameter names and consistent adopt/no-adopt strategy
10088 has_adopt = has_no_adopt = False
10089 for disk in self.op.disks:
10090 if self.op.disk_template != constants.DT_EXT:
10091 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10092 if constants.IDISK_ADOPT in disk:
10095 has_no_adopt = True
10096 if has_adopt and has_no_adopt:
10097 raise errors.OpPrereqError("Either all disks are adopted or none is",
10098 errors.ECODE_INVAL)
10100 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10101 raise errors.OpPrereqError("Disk adoption is not supported for the"
10102 " '%s' disk template" %
10103 self.op.disk_template,
10104 errors.ECODE_INVAL)
10105 if self.op.iallocator is not None:
10106 raise errors.OpPrereqError("Disk adoption not allowed with an"
10107 " iallocator script", errors.ECODE_INVAL)
10108 if self.op.mode == constants.INSTANCE_IMPORT:
10109 raise errors.OpPrereqError("Disk adoption not allowed for"
10110 " instance import", errors.ECODE_INVAL)
10112 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10113 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10114 " but no 'adopt' parameter given" %
10115 self.op.disk_template,
10116 errors.ECODE_INVAL)
10118 self.adopt_disks = has_adopt
10120 # instance name verification
10121 if self.op.name_check:
10122 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10123 self.op.instance_name = self.hostname1.name
10124 # used in CheckPrereq for ip ping check
10125 self.check_ip = self.hostname1.ip
10127 self.check_ip = None
10129 # file storage checks
10130 if (self.op.file_driver and
10131 not self.op.file_driver in constants.FILE_DRIVER):
10132 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10133 self.op.file_driver, errors.ECODE_INVAL)
10135 if self.op.disk_template == constants.DT_FILE:
10136 opcodes.RequireFileStorage()
10137 elif self.op.disk_template == constants.DT_SHARED_FILE:
10138 opcodes.RequireSharedFileStorage()
10140 ### Node/iallocator related checks
10141 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10143 if self.op.pnode is not None:
10144 if self.op.disk_template in constants.DTS_INT_MIRROR:
10145 if self.op.snode is None:
10146 raise errors.OpPrereqError("The networked disk templates need"
10147 " a mirror node", errors.ECODE_INVAL)
10148 elif self.op.snode:
10149 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10151 self.op.snode = None
10153 _CheckOpportunisticLocking(self.op)
10155 self._cds = _GetClusterDomainSecret()
10157 if self.op.mode == constants.INSTANCE_IMPORT:
10158 # On import force_variant must be True, because if we forced it at
10159 # initial install, our only chance when importing it back is that it
10161 self.op.force_variant = True
10163 if self.op.no_install:
10164 self.LogInfo("No-installation mode has no effect during import")
10166 elif self.op.mode == constants.INSTANCE_CREATE:
10167 if self.op.os_type is None:
10168 raise errors.OpPrereqError("No guest OS specified",
10169 errors.ECODE_INVAL)
10170 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10171 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10172 " installation" % self.op.os_type,
10173 errors.ECODE_STATE)
10174 if self.op.disk_template is None:
10175 raise errors.OpPrereqError("No disk template specified",
10176 errors.ECODE_INVAL)
10178 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10179 # Check handshake to ensure both clusters have the same domain secret
10180 src_handshake = self.op.source_handshake
10181 if not src_handshake:
10182 raise errors.OpPrereqError("Missing source handshake",
10183 errors.ECODE_INVAL)
10185 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10188 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10189 errors.ECODE_INVAL)
10191 # Load and check source CA
10192 self.source_x509_ca_pem = self.op.source_x509_ca
10193 if not self.source_x509_ca_pem:
10194 raise errors.OpPrereqError("Missing source X509 CA",
10195 errors.ECODE_INVAL)
10198 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10200 except OpenSSL.crypto.Error, err:
10201 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10202 (err, ), errors.ECODE_INVAL)
10204 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10205 if errcode is not None:
10206 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10207 errors.ECODE_INVAL)
10209 self.source_x509_ca = cert
10211 src_instance_name = self.op.source_instance_name
10212 if not src_instance_name:
10213 raise errors.OpPrereqError("Missing source instance name",
10214 errors.ECODE_INVAL)
10216 self.source_instance_name = \
10217 netutils.GetHostname(name=src_instance_name).name
10220 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10221 self.op.mode, errors.ECODE_INVAL)
10223 def ExpandNames(self):
10224 """ExpandNames for CreateInstance.
10226 Figure out the right locks for instance creation.
10229 self.needed_locks = {}
10231 instance_name = self.op.instance_name
10232 # this is just a preventive check, but someone might still add this
10233 # instance in the meantime, and creation will fail at lock-add time
10234 if instance_name in self.cfg.GetInstanceList():
10235 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10236 instance_name, errors.ECODE_EXISTS)
10238 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10240 if self.op.iallocator:
10241 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10242 # specifying a group on instance creation and then selecting nodes from
10244 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10245 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10247 if self.op.opportunistic_locking:
10248 self.opportunistic_locks[locking.LEVEL_NODE] = True
10249 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10251 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10252 nodelist = [self.op.pnode]
10253 if self.op.snode is not None:
10254 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10255 nodelist.append(self.op.snode)
10256 self.needed_locks[locking.LEVEL_NODE] = nodelist
10258 # in case of import lock the source node too
10259 if self.op.mode == constants.INSTANCE_IMPORT:
10260 src_node = self.op.src_node
10261 src_path = self.op.src_path
10263 if src_path is None:
10264 self.op.src_path = src_path = self.op.instance_name
10266 if src_node is None:
10267 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10268 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10269 self.op.src_node = None
10270 if os.path.isabs(src_path):
10271 raise errors.OpPrereqError("Importing an instance from a path"
10272 " requires a source node option",
10273 errors.ECODE_INVAL)
10275 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10276 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10277 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10278 if not os.path.isabs(src_path):
10279 self.op.src_path = src_path = \
10280 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10282 self.needed_locks[locking.LEVEL_NODE_RES] = \
10283 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10285 def _RunAllocator(self):
10286 """Run the allocator based on input opcode.
10289 if self.op.opportunistic_locking:
10290 # Only consider nodes for which a lock is held
10291 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10293 node_whitelist = None
10295 #TODO Export network to iallocator so that it chooses a pnode
10296 # in a nodegroup that has the desired network connected to
10297 req = _CreateInstanceAllocRequest(self.op, self.disks,
10298 self.nics, self.be_full,
10300 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10302 ial.Run(self.op.iallocator)
10304 if not ial.success:
10305 # When opportunistic locks are used only a temporary failure is generated
10306 if self.op.opportunistic_locking:
10307 ecode = errors.ECODE_TEMP_NORES
10309 ecode = errors.ECODE_NORES
10311 raise errors.OpPrereqError("Can't compute nodes using"
10312 " iallocator '%s': %s" %
10313 (self.op.iallocator, ial.info),
10316 self.op.pnode = ial.result[0]
10317 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10318 self.op.instance_name, self.op.iallocator,
10319 utils.CommaJoin(ial.result))
10321 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10323 if req.RequiredNodes() == 2:
10324 self.op.snode = ial.result[1]
10326 def BuildHooksEnv(self):
10327 """Build hooks env.
10329 This runs on master, primary and secondary nodes of the instance.
10333 "ADD_MODE": self.op.mode,
10335 if self.op.mode == constants.INSTANCE_IMPORT:
10336 env["SRC_NODE"] = self.op.src_node
10337 env["SRC_PATH"] = self.op.src_path
10338 env["SRC_IMAGES"] = self.src_images
10340 env.update(_BuildInstanceHookEnv(
10341 name=self.op.instance_name,
10342 primary_node=self.op.pnode,
10343 secondary_nodes=self.secondaries,
10344 status=self.op.start,
10345 os_type=self.op.os_type,
10346 minmem=self.be_full[constants.BE_MINMEM],
10347 maxmem=self.be_full[constants.BE_MAXMEM],
10348 vcpus=self.be_full[constants.BE_VCPUS],
10349 nics=_NICListToTuple(self, self.nics),
10350 disk_template=self.op.disk_template,
10351 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10352 for d in self.disks],
10355 hypervisor_name=self.op.hypervisor,
10361 def BuildHooksNodes(self):
10362 """Build hooks nodes.
10365 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10368 def _ReadExportInfo(self):
10369 """Reads the export information from disk.
10371 It will override the opcode source node and path with the actual
10372 information, if these two were not specified before.
10374 @return: the export information
10377 assert self.op.mode == constants.INSTANCE_IMPORT
10379 src_node = self.op.src_node
10380 src_path = self.op.src_path
10382 if src_node is None:
10383 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10384 exp_list = self.rpc.call_export_list(locked_nodes)
10386 for node in exp_list:
10387 if exp_list[node].fail_msg:
10389 if src_path in exp_list[node].payload:
10391 self.op.src_node = src_node = node
10392 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10396 raise errors.OpPrereqError("No export found for relative path %s" %
10397 src_path, errors.ECODE_INVAL)
10399 _CheckNodeOnline(self, src_node)
10400 result = self.rpc.call_export_info(src_node, src_path)
10401 result.Raise("No export or invalid export found in dir %s" % src_path)
10403 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10404 if not export_info.has_section(constants.INISECT_EXP):
10405 raise errors.ProgrammerError("Corrupted export config",
10406 errors.ECODE_ENVIRON)
10408 ei_version = export_info.get(constants.INISECT_EXP, "version")
10409 if (int(ei_version) != constants.EXPORT_VERSION):
10410 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10411 (ei_version, constants.EXPORT_VERSION),
10412 errors.ECODE_ENVIRON)
10415 def _ReadExportParams(self, einfo):
10416 """Use export parameters as defaults.
10418 In case the opcode doesn't specify (as in override) some instance
10419 parameters, then try to use them from the export information, if
10420 that declares them.
10423 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10425 if self.op.disk_template is None:
10426 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10427 self.op.disk_template = einfo.get(constants.INISECT_INS,
10429 if self.op.disk_template not in constants.DISK_TEMPLATES:
10430 raise errors.OpPrereqError("Disk template specified in configuration"
10431 " file is not one of the allowed values:"
10433 " ".join(constants.DISK_TEMPLATES),
10434 errors.ECODE_INVAL)
10436 raise errors.OpPrereqError("No disk template specified and the export"
10437 " is missing the disk_template information",
10438 errors.ECODE_INVAL)
10440 if not self.op.disks:
10442 # TODO: import the disk iv_name too
10443 for idx in range(constants.MAX_DISKS):
10444 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10445 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10446 disks.append({constants.IDISK_SIZE: disk_sz})
10447 self.op.disks = disks
10448 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10449 raise errors.OpPrereqError("No disk info specified and the export"
10450 " is missing the disk information",
10451 errors.ECODE_INVAL)
10453 if not self.op.nics:
10455 for idx in range(constants.MAX_NICS):
10456 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10458 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10459 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10464 self.op.nics = nics
10466 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10467 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10469 if (self.op.hypervisor is None and
10470 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10471 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10473 if einfo.has_section(constants.INISECT_HYP):
10474 # use the export parameters but do not override the ones
10475 # specified by the user
10476 for name, value in einfo.items(constants.INISECT_HYP):
10477 if name not in self.op.hvparams:
10478 self.op.hvparams[name] = value
10480 if einfo.has_section(constants.INISECT_BEP):
10481 # use the parameters, without overriding
10482 for name, value in einfo.items(constants.INISECT_BEP):
10483 if name not in self.op.beparams:
10484 self.op.beparams[name] = value
10485 # Compatibility for the old "memory" be param
10486 if name == constants.BE_MEMORY:
10487 if constants.BE_MAXMEM not in self.op.beparams:
10488 self.op.beparams[constants.BE_MAXMEM] = value
10489 if constants.BE_MINMEM not in self.op.beparams:
10490 self.op.beparams[constants.BE_MINMEM] = value
10492 # try to read the parameters old style, from the main section
10493 for name in constants.BES_PARAMETERS:
10494 if (name not in self.op.beparams and
10495 einfo.has_option(constants.INISECT_INS, name)):
10496 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10498 if einfo.has_section(constants.INISECT_OSP):
10499 # use the parameters, without overriding
10500 for name, value in einfo.items(constants.INISECT_OSP):
10501 if name not in self.op.osparams:
10502 self.op.osparams[name] = value
10504 def _RevertToDefaults(self, cluster):
10505 """Revert the instance parameters to the default values.
10509 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10510 for name in self.op.hvparams.keys():
10511 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10512 del self.op.hvparams[name]
10514 be_defs = cluster.SimpleFillBE({})
10515 for name in self.op.beparams.keys():
10516 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10517 del self.op.beparams[name]
10519 nic_defs = cluster.SimpleFillNIC({})
10520 for nic in self.op.nics:
10521 for name in constants.NICS_PARAMETERS:
10522 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10525 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10526 for name in self.op.osparams.keys():
10527 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10528 del self.op.osparams[name]
10530 def _CalculateFileStorageDir(self):
10531 """Calculate final instance file storage dir.
10534 # file storage dir calculation/check
10535 self.instance_file_storage_dir = None
10536 if self.op.disk_template in constants.DTS_FILEBASED:
10537 # build the full file storage dir path
10540 if self.op.disk_template == constants.DT_SHARED_FILE:
10541 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10543 get_fsd_fn = self.cfg.GetFileStorageDir
10545 cfg_storagedir = get_fsd_fn()
10546 if not cfg_storagedir:
10547 raise errors.OpPrereqError("Cluster file storage dir not defined",
10548 errors.ECODE_STATE)
10549 joinargs.append(cfg_storagedir)
10551 if self.op.file_storage_dir is not None:
10552 joinargs.append(self.op.file_storage_dir)
10554 joinargs.append(self.op.instance_name)
10556 # pylint: disable=W0142
10557 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10559 def CheckPrereq(self): # pylint: disable=R0914
10560 """Check prerequisites.
10563 self._CalculateFileStorageDir()
10565 if self.op.mode == constants.INSTANCE_IMPORT:
10566 export_info = self._ReadExportInfo()
10567 self._ReadExportParams(export_info)
10568 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10570 self._old_instance_name = None
10572 if (not self.cfg.GetVGName() and
10573 self.op.disk_template not in constants.DTS_NOT_LVM):
10574 raise errors.OpPrereqError("Cluster does not support lvm-based"
10575 " instances", errors.ECODE_STATE)
10577 if (self.op.hypervisor is None or
10578 self.op.hypervisor == constants.VALUE_AUTO):
10579 self.op.hypervisor = self.cfg.GetHypervisorType()
10581 cluster = self.cfg.GetClusterInfo()
10582 enabled_hvs = cluster.enabled_hypervisors
10583 if self.op.hypervisor not in enabled_hvs:
10584 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10586 (self.op.hypervisor, ",".join(enabled_hvs)),
10587 errors.ECODE_STATE)
10589 # Check tag validity
10590 for tag in self.op.tags:
10591 objects.TaggableObject.ValidateTag(tag)
10593 # check hypervisor parameter syntax (locally)
10594 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10595 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10597 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10598 hv_type.CheckParameterSyntax(filled_hvp)
10599 self.hv_full = filled_hvp
10600 # check that we don't specify global parameters on an instance
10601 _CheckGlobalHvParams(self.op.hvparams)
10603 # fill and remember the beparams dict
10604 self.be_full = _ComputeFullBeParams(self.op, cluster)
10606 # build os parameters
10607 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10609 # now that hvp/bep are in final format, let's reset to defaults,
10611 if self.op.identify_defaults:
10612 self._RevertToDefaults(cluster)
10615 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10616 self.proc.GetECId())
10618 # disk checks/pre-build
10619 default_vg = self.cfg.GetVGName()
10620 self.disks = _ComputeDisks(self.op, default_vg)
10622 if self.op.mode == constants.INSTANCE_IMPORT:
10624 for idx in range(len(self.disks)):
10625 option = "disk%d_dump" % idx
10626 if export_info.has_option(constants.INISECT_INS, option):
10627 # FIXME: are the old os-es, disk sizes, etc. useful?
10628 export_name = export_info.get(constants.INISECT_INS, option)
10629 image = utils.PathJoin(self.op.src_path, export_name)
10630 disk_images.append(image)
10632 disk_images.append(False)
10634 self.src_images = disk_images
10636 if self.op.instance_name == self._old_instance_name:
10637 for idx, nic in enumerate(self.nics):
10638 if nic.mac == constants.VALUE_AUTO:
10639 nic_mac_ini = "nic%d_mac" % idx
10640 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10642 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10644 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10645 if self.op.ip_check:
10646 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10647 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10648 (self.check_ip, self.op.instance_name),
10649 errors.ECODE_NOTUNIQUE)
10651 #### mac address generation
10652 # By generating here the mac address both the allocator and the hooks get
10653 # the real final mac address rather than the 'auto' or 'generate' value.
10654 # There is a race condition between the generation and the instance object
10655 # creation, which means that we know the mac is valid now, but we're not
10656 # sure it will be when we actually add the instance. If things go bad
10657 # adding the instance will abort because of a duplicate mac, and the
10658 # creation job will fail.
10659 for nic in self.nics:
10660 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10661 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10665 if self.op.iallocator is not None:
10666 self._RunAllocator()
10668 # Release all unneeded node locks
10669 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10670 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10671 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10672 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10674 assert (self.owned_locks(locking.LEVEL_NODE) ==
10675 self.owned_locks(locking.LEVEL_NODE_RES)), \
10676 "Node locks differ from node resource locks"
10678 #### node related checks
10680 # check primary node
10681 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10682 assert self.pnode is not None, \
10683 "Cannot retrieve locked node %s" % self.op.pnode
10685 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10686 pnode.name, errors.ECODE_STATE)
10688 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10689 pnode.name, errors.ECODE_STATE)
10690 if not pnode.vm_capable:
10691 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10692 " '%s'" % pnode.name, errors.ECODE_STATE)
10694 self.secondaries = []
10696 # Fill in any IPs from IP pools. This must happen here, because we need to
10697 # know the nic's primary node, as specified by the iallocator
10698 for idx, nic in enumerate(self.nics):
10700 if net is not None:
10701 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10702 if netparams is None:
10703 raise errors.OpPrereqError("No netparams found for network"
10704 " %s. Propably not connected to"
10705 " node's %s nodegroup" %
10706 (net, self.pnode.name),
10707 errors.ECODE_INVAL)
10708 self.LogInfo("NIC/%d inherits netparams %s" %
10709 (idx, netparams.values()))
10710 nic.nicparams = dict(netparams)
10711 if nic.ip is not None:
10712 if nic.ip.lower() == constants.NIC_IP_POOL:
10714 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10715 except errors.ReservationError:
10716 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10717 " from the address pool" % idx,
10718 errors.ECODE_STATE)
10719 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10722 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10723 except errors.ReservationError:
10724 raise errors.OpPrereqError("IP address %s already in use"
10725 " or does not belong to network %s" %
10727 errors.ECODE_NOTUNIQUE)
10729 # net is None, ip None or given
10730 elif self.op.conflicts_check:
10731 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10733 # mirror node verification
10734 if self.op.disk_template in constants.DTS_INT_MIRROR:
10735 if self.op.snode == pnode.name:
10736 raise errors.OpPrereqError("The secondary node cannot be the"
10737 " primary node", errors.ECODE_INVAL)
10738 _CheckNodeOnline(self, self.op.snode)
10739 _CheckNodeNotDrained(self, self.op.snode)
10740 _CheckNodeVmCapable(self, self.op.snode)
10741 self.secondaries.append(self.op.snode)
10743 snode = self.cfg.GetNodeInfo(self.op.snode)
10744 if pnode.group != snode.group:
10745 self.LogWarning("The primary and secondary nodes are in two"
10746 " different node groups; the disk parameters"
10747 " from the first disk's node group will be"
10750 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10752 if self.op.disk_template in constants.DTS_INT_MIRROR:
10753 nodes.append(snode)
10754 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10755 if compat.any(map(has_es, nodes)):
10756 raise errors.OpPrereqError("Disk template %s not supported with"
10757 " exclusive storage" % self.op.disk_template,
10758 errors.ECODE_STATE)
10760 nodenames = [pnode.name] + self.secondaries
10762 # Verify instance specs
10763 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10765 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10766 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10767 constants.ISPEC_DISK_COUNT: len(self.disks),
10768 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10769 constants.ISPEC_NIC_COUNT: len(self.nics),
10770 constants.ISPEC_SPINDLE_USE: spindle_use,
10773 group_info = self.cfg.GetNodeGroup(pnode.group)
10774 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10775 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10776 if not self.op.ignore_ipolicy and res:
10777 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10778 (pnode.group, group_info.name, utils.CommaJoin(res)))
10779 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10781 if not self.adopt_disks:
10782 if self.op.disk_template == constants.DT_RBD:
10783 # _CheckRADOSFreeSpace() is just a placeholder.
10784 # Any function that checks prerequisites can be placed here.
10785 # Check if there is enough space on the RADOS cluster.
10786 _CheckRADOSFreeSpace()
10787 elif self.op.disk_template == constants.DT_EXT:
10788 # FIXME: Function that checks prereqs if needed
10791 # Check lv size requirements, if not adopting
10792 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10793 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10795 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10796 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10797 disk[constants.IDISK_ADOPT])
10798 for disk in self.disks])
10799 if len(all_lvs) != len(self.disks):
10800 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10801 errors.ECODE_INVAL)
10802 for lv_name in all_lvs:
10804 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10805 # to ReserveLV uses the same syntax
10806 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10807 except errors.ReservationError:
10808 raise errors.OpPrereqError("LV named %s used by another instance" %
10809 lv_name, errors.ECODE_NOTUNIQUE)
10811 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10812 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10814 node_lvs = self.rpc.call_lv_list([pnode.name],
10815 vg_names.payload.keys())[pnode.name]
10816 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10817 node_lvs = node_lvs.payload
10819 delta = all_lvs.difference(node_lvs.keys())
10821 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10822 utils.CommaJoin(delta),
10823 errors.ECODE_INVAL)
10824 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10826 raise errors.OpPrereqError("Online logical volumes found, cannot"
10827 " adopt: %s" % utils.CommaJoin(online_lvs),
10828 errors.ECODE_STATE)
10829 # update the size of disk based on what is found
10830 for dsk in self.disks:
10831 dsk[constants.IDISK_SIZE] = \
10832 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10833 dsk[constants.IDISK_ADOPT])][0]))
10835 elif self.op.disk_template == constants.DT_BLOCK:
10836 # Normalize and de-duplicate device paths
10837 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10838 for disk in self.disks])
10839 if len(all_disks) != len(self.disks):
10840 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10841 errors.ECODE_INVAL)
10842 baddisks = [d for d in all_disks
10843 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10845 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10846 " cannot be adopted" %
10847 (utils.CommaJoin(baddisks),
10848 constants.ADOPTABLE_BLOCKDEV_ROOT),
10849 errors.ECODE_INVAL)
10851 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10852 list(all_disks))[pnode.name]
10853 node_disks.Raise("Cannot get block device information from node %s" %
10855 node_disks = node_disks.payload
10856 delta = all_disks.difference(node_disks.keys())
10858 raise errors.OpPrereqError("Missing block device(s): %s" %
10859 utils.CommaJoin(delta),
10860 errors.ECODE_INVAL)
10861 for dsk in self.disks:
10862 dsk[constants.IDISK_SIZE] = \
10863 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10865 # Verify instance specs
10866 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10868 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10869 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10870 constants.ISPEC_DISK_COUNT: len(self.disks),
10871 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10872 for disk in self.disks],
10873 constants.ISPEC_NIC_COUNT: len(self.nics),
10874 constants.ISPEC_SPINDLE_USE: spindle_use,
10877 group_info = self.cfg.GetNodeGroup(pnode.group)
10878 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10879 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10880 if not self.op.ignore_ipolicy and res:
10881 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10882 " policy: %s") % (pnode.group,
10883 utils.CommaJoin(res)),
10884 errors.ECODE_INVAL)
10886 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10888 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10889 # check OS parameters (remotely)
10890 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10892 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10894 #TODO: _CheckExtParams (remotely)
10895 # Check parameters for extstorage
10897 # memory check on primary node
10898 #TODO(dynmem): use MINMEM for checking
10900 _CheckNodeFreeMemory(self, self.pnode.name,
10901 "creating instance %s" % self.op.instance_name,
10902 self.be_full[constants.BE_MAXMEM],
10903 self.op.hypervisor)
10905 self.dry_run_result = list(nodenames)
10907 def Exec(self, feedback_fn):
10908 """Create and add the instance to the cluster.
10911 instance = self.op.instance_name
10912 pnode_name = self.pnode.name
10914 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10915 self.owned_locks(locking.LEVEL_NODE)), \
10916 "Node locks differ from node resource locks"
10917 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10919 ht_kind = self.op.hypervisor
10920 if ht_kind in constants.HTS_REQ_PORT:
10921 network_port = self.cfg.AllocatePort()
10923 network_port = None
10925 # This is ugly but we got a chicken-egg problem here
10926 # We can only take the group disk parameters, as the instance
10927 # has no disks yet (we are generating them right here).
10928 node = self.cfg.GetNodeInfo(pnode_name)
10929 nodegroup = self.cfg.GetNodeGroup(node.group)
10930 disks = _GenerateDiskTemplate(self,
10931 self.op.disk_template,
10932 instance, pnode_name,
10935 self.instance_file_storage_dir,
10936 self.op.file_driver,
10939 self.cfg.GetGroupDiskParams(nodegroup))
10941 iobj = objects.Instance(name=instance, os=self.op.os_type,
10942 primary_node=pnode_name,
10943 nics=self.nics, disks=disks,
10944 disk_template=self.op.disk_template,
10945 admin_state=constants.ADMINST_DOWN,
10946 network_port=network_port,
10947 beparams=self.op.beparams,
10948 hvparams=self.op.hvparams,
10949 hypervisor=self.op.hypervisor,
10950 osparams=self.op.osparams,
10954 for tag in self.op.tags:
10957 if self.adopt_disks:
10958 if self.op.disk_template == constants.DT_PLAIN:
10959 # rename LVs to the newly-generated names; we need to construct
10960 # 'fake' LV disks with the old data, plus the new unique_id
10961 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10963 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10964 rename_to.append(t_dsk.logical_id)
10965 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10966 self.cfg.SetDiskID(t_dsk, pnode_name)
10967 result = self.rpc.call_blockdev_rename(pnode_name,
10968 zip(tmp_disks, rename_to))
10969 result.Raise("Failed to rename adoped LVs")
10971 feedback_fn("* creating instance disks...")
10973 _CreateDisks(self, iobj)
10974 except errors.OpExecError:
10975 self.LogWarning("Device creation failed, reverting...")
10977 _RemoveDisks(self, iobj)
10979 self.cfg.ReleaseDRBDMinors(instance)
10982 feedback_fn("adding instance %s to cluster config" % instance)
10984 self.cfg.AddInstance(iobj, self.proc.GetECId())
10986 # Declare that we don't want to remove the instance lock anymore, as we've
10987 # added the instance to the config
10988 del self.remove_locks[locking.LEVEL_INSTANCE]
10990 if self.op.mode == constants.INSTANCE_IMPORT:
10991 # Release unused nodes
10992 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10994 # Release all nodes
10995 _ReleaseLocks(self, locking.LEVEL_NODE)
10998 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10999 feedback_fn("* wiping instance disks...")
11001 _WipeDisks(self, iobj)
11002 except errors.OpExecError, err:
11003 logging.exception("Wiping disks failed")
11004 self.LogWarning("Wiping instance disks failed (%s)", err)
11008 # Something is already wrong with the disks, don't do anything else
11010 elif self.op.wait_for_sync:
11011 disk_abort = not _WaitForSync(self, iobj)
11012 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11013 # make sure the disks are not degraded (still sync-ing is ok)
11014 feedback_fn("* checking mirrors status")
11015 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11020 _RemoveDisks(self, iobj)
11021 self.cfg.RemoveInstance(iobj.name)
11022 # Make sure the instance lock gets removed
11023 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11024 raise errors.OpExecError("There are some degraded disks for"
11027 # Release all node resource locks
11028 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11030 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11031 # we need to set the disks ID to the primary node, since the
11032 # preceding code might or might have not done it, depending on
11033 # disk template and other options
11034 for disk in iobj.disks:
11035 self.cfg.SetDiskID(disk, pnode_name)
11036 if self.op.mode == constants.INSTANCE_CREATE:
11037 if not self.op.no_install:
11038 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11039 not self.op.wait_for_sync)
11041 feedback_fn("* pausing disk sync to install instance OS")
11042 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11045 for idx, success in enumerate(result.payload):
11047 logging.warn("pause-sync of instance %s for disk %d failed",
11050 feedback_fn("* running the instance OS create scripts...")
11051 # FIXME: pass debug option from opcode to backend
11053 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11054 self.op.debug_level)
11056 feedback_fn("* resuming disk sync")
11057 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11060 for idx, success in enumerate(result.payload):
11062 logging.warn("resume-sync of instance %s for disk %d failed",
11065 os_add_result.Raise("Could not add os for instance %s"
11066 " on node %s" % (instance, pnode_name))
11069 if self.op.mode == constants.INSTANCE_IMPORT:
11070 feedback_fn("* running the instance OS import scripts...")
11074 for idx, image in enumerate(self.src_images):
11078 # FIXME: pass debug option from opcode to backend
11079 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11080 constants.IEIO_FILE, (image, ),
11081 constants.IEIO_SCRIPT,
11082 (iobj.disks[idx], idx),
11084 transfers.append(dt)
11087 masterd.instance.TransferInstanceData(self, feedback_fn,
11088 self.op.src_node, pnode_name,
11089 self.pnode.secondary_ip,
11091 if not compat.all(import_result):
11092 self.LogWarning("Some disks for instance %s on node %s were not"
11093 " imported successfully" % (instance, pnode_name))
11095 rename_from = self._old_instance_name
11097 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11098 feedback_fn("* preparing remote import...")
11099 # The source cluster will stop the instance before attempting to make
11100 # a connection. In some cases stopping an instance can take a long
11101 # time, hence the shutdown timeout is added to the connection
11103 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11104 self.op.source_shutdown_timeout)
11105 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11107 assert iobj.primary_node == self.pnode.name
11109 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11110 self.source_x509_ca,
11111 self._cds, timeouts)
11112 if not compat.all(disk_results):
11113 # TODO: Should the instance still be started, even if some disks
11114 # failed to import (valid for local imports, too)?
11115 self.LogWarning("Some disks for instance %s on node %s were not"
11116 " imported successfully" % (instance, pnode_name))
11118 rename_from = self.source_instance_name
11121 # also checked in the prereq part
11122 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11125 # Run rename script on newly imported instance
11126 assert iobj.name == instance
11127 feedback_fn("Running rename script for %s" % instance)
11128 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11130 self.op.debug_level)
11131 if result.fail_msg:
11132 self.LogWarning("Failed to run rename script for %s on node"
11133 " %s: %s" % (instance, pnode_name, result.fail_msg))
11135 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11138 iobj.admin_state = constants.ADMINST_UP
11139 self.cfg.Update(iobj, feedback_fn)
11140 logging.info("Starting instance %s on node %s", instance, pnode_name)
11141 feedback_fn("* starting instance...")
11142 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11144 result.Raise("Could not start instance")
11146 return list(iobj.all_nodes)
11149 class LUInstanceMultiAlloc(NoHooksLU):
11150 """Allocates multiple instances at the same time.
11155 def CheckArguments(self):
11156 """Check arguments.
11160 for inst in self.op.instances:
11161 if inst.iallocator is not None:
11162 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11163 " instance objects", errors.ECODE_INVAL)
11164 nodes.append(bool(inst.pnode))
11165 if inst.disk_template in constants.DTS_INT_MIRROR:
11166 nodes.append(bool(inst.snode))
11168 has_nodes = compat.any(nodes)
11169 if compat.all(nodes) ^ has_nodes:
11170 raise errors.OpPrereqError("There are instance objects providing"
11171 " pnode/snode while others do not",
11172 errors.ECODE_INVAL)
11174 if self.op.iallocator is None:
11175 default_iallocator = self.cfg.GetDefaultIAllocator()
11176 if default_iallocator and has_nodes:
11177 self.op.iallocator = default_iallocator
11179 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11180 " given and no cluster-wide default"
11181 " iallocator found; please specify either"
11182 " an iallocator or nodes on the instances"
11183 " or set a cluster-wide default iallocator",
11184 errors.ECODE_INVAL)
11186 _CheckOpportunisticLocking(self.op)
11188 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11190 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11191 utils.CommaJoin(dups), errors.ECODE_INVAL)
11193 def ExpandNames(self):
11194 """Calculate the locks.
11197 self.share_locks = _ShareAll()
11198 self.needed_locks = {
11199 # iallocator will select nodes and even if no iallocator is used,
11200 # collisions with LUInstanceCreate should be avoided
11201 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11204 if self.op.iallocator:
11205 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11206 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11208 if self.op.opportunistic_locking:
11209 self.opportunistic_locks[locking.LEVEL_NODE] = True
11210 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11213 for inst in self.op.instances:
11214 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11215 nodeslist.append(inst.pnode)
11216 if inst.snode is not None:
11217 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11218 nodeslist.append(inst.snode)
11220 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11221 # Lock resources of instance's primary and secondary nodes (copy to
11222 # prevent accidential modification)
11223 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11225 def CheckPrereq(self):
11226 """Check prerequisite.
11229 cluster = self.cfg.GetClusterInfo()
11230 default_vg = self.cfg.GetVGName()
11231 ec_id = self.proc.GetECId()
11233 if self.op.opportunistic_locking:
11234 # Only consider nodes for which a lock is held
11235 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11237 node_whitelist = None
11239 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11240 _ComputeNics(op, cluster, None,
11242 _ComputeFullBeParams(op, cluster),
11244 for op in self.op.instances]
11246 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11247 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11249 ial.Run(self.op.iallocator)
11251 if not ial.success:
11252 raise errors.OpPrereqError("Can't compute nodes using"
11253 " iallocator '%s': %s" %
11254 (self.op.iallocator, ial.info),
11255 errors.ECODE_NORES)
11257 self.ia_result = ial.result
11259 if self.op.dry_run:
11260 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11261 constants.JOB_IDS_KEY: [],
11264 def _ConstructPartialResult(self):
11265 """Contructs the partial result.
11268 (allocatable, failed) = self.ia_result
11270 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11271 map(compat.fst, allocatable),
11272 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11275 def Exec(self, feedback_fn):
11276 """Executes the opcode.
11279 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11280 (allocatable, failed) = self.ia_result
11283 for (name, nodes) in allocatable:
11284 op = op2inst.pop(name)
11287 (op.pnode, op.snode) = nodes
11289 (op.pnode,) = nodes
11293 missing = set(op2inst.keys()) - set(failed)
11294 assert not missing, \
11295 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11297 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11300 def _CheckRADOSFreeSpace():
11301 """Compute disk size requirements inside the RADOS cluster.
11304 # For the RADOS cluster we assume there is always enough space.
11308 class LUInstanceConsole(NoHooksLU):
11309 """Connect to an instance's console.
11311 This is somewhat special in that it returns the command line that
11312 you need to run on the master node in order to connect to the
11318 def ExpandNames(self):
11319 self.share_locks = _ShareAll()
11320 self._ExpandAndLockInstance()
11322 def CheckPrereq(self):
11323 """Check prerequisites.
11325 This checks that the instance is in the cluster.
11328 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11329 assert self.instance is not None, \
11330 "Cannot retrieve locked instance %s" % self.op.instance_name
11331 _CheckNodeOnline(self, self.instance.primary_node)
11333 def Exec(self, feedback_fn):
11334 """Connect to the console of an instance
11337 instance = self.instance
11338 node = instance.primary_node
11340 node_insts = self.rpc.call_instance_list([node],
11341 [instance.hypervisor])[node]
11342 node_insts.Raise("Can't get node information from %s" % node)
11344 if instance.name not in node_insts.payload:
11345 if instance.admin_state == constants.ADMINST_UP:
11346 state = constants.INSTST_ERRORDOWN
11347 elif instance.admin_state == constants.ADMINST_DOWN:
11348 state = constants.INSTST_ADMINDOWN
11350 state = constants.INSTST_ADMINOFFLINE
11351 raise errors.OpExecError("Instance %s is not running (state %s)" %
11352 (instance.name, state))
11354 logging.debug("Connecting to console of %s on %s", instance.name, node)
11356 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11359 def _GetInstanceConsole(cluster, instance):
11360 """Returns console information for an instance.
11362 @type cluster: L{objects.Cluster}
11363 @type instance: L{objects.Instance}
11367 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11368 # beparams and hvparams are passed separately, to avoid editing the
11369 # instance and then saving the defaults in the instance itself.
11370 hvparams = cluster.FillHV(instance)
11371 beparams = cluster.FillBE(instance)
11372 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11374 assert console.instance == instance.name
11375 assert console.Validate()
11377 return console.ToDict()
11380 class LUInstanceReplaceDisks(LogicalUnit):
11381 """Replace the disks of an instance.
11384 HPATH = "mirrors-replace"
11385 HTYPE = constants.HTYPE_INSTANCE
11388 def CheckArguments(self):
11389 """Check arguments.
11392 remote_node = self.op.remote_node
11393 ialloc = self.op.iallocator
11394 if self.op.mode == constants.REPLACE_DISK_CHG:
11395 if remote_node is None and ialloc is None:
11396 raise errors.OpPrereqError("When changing the secondary either an"
11397 " iallocator script must be used or the"
11398 " new node given", errors.ECODE_INVAL)
11400 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11402 elif remote_node is not None or ialloc is not None:
11403 # Not replacing the secondary
11404 raise errors.OpPrereqError("The iallocator and new node options can"
11405 " only be used when changing the"
11406 " secondary node", errors.ECODE_INVAL)
11408 def ExpandNames(self):
11409 self._ExpandAndLockInstance()
11411 assert locking.LEVEL_NODE not in self.needed_locks
11412 assert locking.LEVEL_NODE_RES not in self.needed_locks
11413 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11415 assert self.op.iallocator is None or self.op.remote_node is None, \
11416 "Conflicting options"
11418 if self.op.remote_node is not None:
11419 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11421 # Warning: do not remove the locking of the new secondary here
11422 # unless DRBD8.AddChildren is changed to work in parallel;
11423 # currently it doesn't since parallel invocations of
11424 # FindUnusedMinor will conflict
11425 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11426 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11428 self.needed_locks[locking.LEVEL_NODE] = []
11429 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11431 if self.op.iallocator is not None:
11432 # iallocator will select a new node in the same group
11433 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11434 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11436 self.needed_locks[locking.LEVEL_NODE_RES] = []
11438 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11439 self.op.iallocator, self.op.remote_node,
11440 self.op.disks, self.op.early_release,
11441 self.op.ignore_ipolicy)
11443 self.tasklets = [self.replacer]
11445 def DeclareLocks(self, level):
11446 if level == locking.LEVEL_NODEGROUP:
11447 assert self.op.remote_node is None
11448 assert self.op.iallocator is not None
11449 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11451 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11452 # Lock all groups used by instance optimistically; this requires going
11453 # via the node before it's locked, requiring verification later on
11454 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11455 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11457 elif level == locking.LEVEL_NODE:
11458 if self.op.iallocator is not None:
11459 assert self.op.remote_node is None
11460 assert not self.needed_locks[locking.LEVEL_NODE]
11461 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11463 # Lock member nodes of all locked groups
11464 self.needed_locks[locking.LEVEL_NODE] = \
11466 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11467 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11469 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11471 self._LockInstancesNodes()
11473 elif level == locking.LEVEL_NODE_RES:
11475 self.needed_locks[locking.LEVEL_NODE_RES] = \
11476 self.needed_locks[locking.LEVEL_NODE]
11478 def BuildHooksEnv(self):
11479 """Build hooks env.
11481 This runs on the master, the primary and all the secondaries.
11484 instance = self.replacer.instance
11486 "MODE": self.op.mode,
11487 "NEW_SECONDARY": self.op.remote_node,
11488 "OLD_SECONDARY": instance.secondary_nodes[0],
11490 env.update(_BuildInstanceHookEnvByObject(self, instance))
11493 def BuildHooksNodes(self):
11494 """Build hooks nodes.
11497 instance = self.replacer.instance
11499 self.cfg.GetMasterNode(),
11500 instance.primary_node,
11502 if self.op.remote_node is not None:
11503 nl.append(self.op.remote_node)
11506 def CheckPrereq(self):
11507 """Check prerequisites.
11510 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11511 self.op.iallocator is None)
11513 # Verify if node group locks are still correct
11514 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11516 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11518 return LogicalUnit.CheckPrereq(self)
11521 class TLReplaceDisks(Tasklet):
11522 """Replaces disks for an instance.
11524 Note: Locking is not within the scope of this class.
11527 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11528 disks, early_release, ignore_ipolicy):
11529 """Initializes this class.
11532 Tasklet.__init__(self, lu)
11535 self.instance_name = instance_name
11537 self.iallocator_name = iallocator_name
11538 self.remote_node = remote_node
11540 self.early_release = early_release
11541 self.ignore_ipolicy = ignore_ipolicy
11544 self.instance = None
11545 self.new_node = None
11546 self.target_node = None
11547 self.other_node = None
11548 self.remote_node_info = None
11549 self.node_secondary_ip = None
11552 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11553 """Compute a new secondary node using an IAllocator.
11556 req = iallocator.IAReqRelocate(name=instance_name,
11557 relocate_from=list(relocate_from))
11558 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11560 ial.Run(iallocator_name)
11562 if not ial.success:
11563 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11564 " %s" % (iallocator_name, ial.info),
11565 errors.ECODE_NORES)
11567 remote_node_name = ial.result[0]
11569 lu.LogInfo("Selected new secondary for instance '%s': %s",
11570 instance_name, remote_node_name)
11572 return remote_node_name
11574 def _FindFaultyDisks(self, node_name):
11575 """Wrapper for L{_FindFaultyInstanceDisks}.
11578 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11581 def _CheckDisksActivated(self, instance):
11582 """Checks if the instance disks are activated.
11584 @param instance: The instance to check disks
11585 @return: True if they are activated, False otherwise
11588 nodes = instance.all_nodes
11590 for idx, dev in enumerate(instance.disks):
11592 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11593 self.cfg.SetDiskID(dev, node)
11595 result = _BlockdevFind(self, node, dev, instance)
11599 elif result.fail_msg or not result.payload:
11604 def CheckPrereq(self):
11605 """Check prerequisites.
11607 This checks that the instance is in the cluster.
11610 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11611 assert instance is not None, \
11612 "Cannot retrieve locked instance %s" % self.instance_name
11614 if instance.disk_template != constants.DT_DRBD8:
11615 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11616 " instances", errors.ECODE_INVAL)
11618 if len(instance.secondary_nodes) != 1:
11619 raise errors.OpPrereqError("The instance has a strange layout,"
11620 " expected one secondary but found %d" %
11621 len(instance.secondary_nodes),
11622 errors.ECODE_FAULT)
11624 instance = self.instance
11625 secondary_node = instance.secondary_nodes[0]
11627 if self.iallocator_name is None:
11628 remote_node = self.remote_node
11630 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11631 instance.name, instance.secondary_nodes)
11633 if remote_node is None:
11634 self.remote_node_info = None
11636 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11637 "Remote node '%s' is not locked" % remote_node
11639 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11640 assert self.remote_node_info is not None, \
11641 "Cannot retrieve locked node %s" % remote_node
11643 if remote_node == self.instance.primary_node:
11644 raise errors.OpPrereqError("The specified node is the primary node of"
11645 " the instance", errors.ECODE_INVAL)
11647 if remote_node == secondary_node:
11648 raise errors.OpPrereqError("The specified node is already the"
11649 " secondary node of the instance",
11650 errors.ECODE_INVAL)
11652 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11653 constants.REPLACE_DISK_CHG):
11654 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11655 errors.ECODE_INVAL)
11657 if self.mode == constants.REPLACE_DISK_AUTO:
11658 if not self._CheckDisksActivated(instance):
11659 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11660 " first" % self.instance_name,
11661 errors.ECODE_STATE)
11662 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11663 faulty_secondary = self._FindFaultyDisks(secondary_node)
11665 if faulty_primary and faulty_secondary:
11666 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11667 " one node and can not be repaired"
11668 " automatically" % self.instance_name,
11669 errors.ECODE_STATE)
11672 self.disks = faulty_primary
11673 self.target_node = instance.primary_node
11674 self.other_node = secondary_node
11675 check_nodes = [self.target_node, self.other_node]
11676 elif faulty_secondary:
11677 self.disks = faulty_secondary
11678 self.target_node = secondary_node
11679 self.other_node = instance.primary_node
11680 check_nodes = [self.target_node, self.other_node]
11686 # Non-automatic modes
11687 if self.mode == constants.REPLACE_DISK_PRI:
11688 self.target_node = instance.primary_node
11689 self.other_node = secondary_node
11690 check_nodes = [self.target_node, self.other_node]
11692 elif self.mode == constants.REPLACE_DISK_SEC:
11693 self.target_node = secondary_node
11694 self.other_node = instance.primary_node
11695 check_nodes = [self.target_node, self.other_node]
11697 elif self.mode == constants.REPLACE_DISK_CHG:
11698 self.new_node = remote_node
11699 self.other_node = instance.primary_node
11700 self.target_node = secondary_node
11701 check_nodes = [self.new_node, self.other_node]
11703 _CheckNodeNotDrained(self.lu, remote_node)
11704 _CheckNodeVmCapable(self.lu, remote_node)
11706 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11707 assert old_node_info is not None
11708 if old_node_info.offline and not self.early_release:
11709 # doesn't make sense to delay the release
11710 self.early_release = True
11711 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11712 " early-release mode", secondary_node)
11715 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11718 # If not specified all disks should be replaced
11720 self.disks = range(len(self.instance.disks))
11722 # TODO: This is ugly, but right now we can't distinguish between internal
11723 # submitted opcode and external one. We should fix that.
11724 if self.remote_node_info:
11725 # We change the node, lets verify it still meets instance policy
11726 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11727 cluster = self.cfg.GetClusterInfo()
11728 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11730 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11731 ignore=self.ignore_ipolicy)
11733 for node in check_nodes:
11734 _CheckNodeOnline(self.lu, node)
11736 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11739 if node_name is not None)
11741 # Release unneeded node and node resource locks
11742 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11743 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11744 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11746 # Release any owned node group
11747 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11749 # Check whether disks are valid
11750 for disk_idx in self.disks:
11751 instance.FindDisk(disk_idx)
11753 # Get secondary node IP addresses
11754 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11755 in self.cfg.GetMultiNodeInfo(touched_nodes))
11757 def Exec(self, feedback_fn):
11758 """Execute disk replacement.
11760 This dispatches the disk replacement to the appropriate handler.
11764 # Verify owned locks before starting operation
11765 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11766 assert set(owned_nodes) == set(self.node_secondary_ip), \
11767 ("Incorrect node locks, owning %s, expected %s" %
11768 (owned_nodes, self.node_secondary_ip.keys()))
11769 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11770 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11771 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11773 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11774 assert list(owned_instances) == [self.instance_name], \
11775 "Instance '%s' not locked" % self.instance_name
11777 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11778 "Should not own any node group lock at this point"
11781 feedback_fn("No disks need replacement for instance '%s'" %
11782 self.instance.name)
11785 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11786 (utils.CommaJoin(self.disks), self.instance.name))
11787 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11788 feedback_fn("Current seconary node: %s" %
11789 utils.CommaJoin(self.instance.secondary_nodes))
11791 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11793 # Activate the instance disks if we're replacing them on a down instance
11795 _StartInstanceDisks(self.lu, self.instance, True)
11798 # Should we replace the secondary node?
11799 if self.new_node is not None:
11800 fn = self._ExecDrbd8Secondary
11802 fn = self._ExecDrbd8DiskOnly
11804 result = fn(feedback_fn)
11806 # Deactivate the instance disks if we're replacing them on a
11809 _SafeShutdownInstanceDisks(self.lu, self.instance)
11811 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11814 # Verify owned locks
11815 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11816 nodes = frozenset(self.node_secondary_ip)
11817 assert ((self.early_release and not owned_nodes) or
11818 (not self.early_release and not (set(owned_nodes) - nodes))), \
11819 ("Not owning the correct locks, early_release=%s, owned=%r,"
11820 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11824 def _CheckVolumeGroup(self, nodes):
11825 self.lu.LogInfo("Checking volume groups")
11827 vgname = self.cfg.GetVGName()
11829 # Make sure volume group exists on all involved nodes
11830 results = self.rpc.call_vg_list(nodes)
11832 raise errors.OpExecError("Can't list volume groups on the nodes")
11835 res = results[node]
11836 res.Raise("Error checking node %s" % node)
11837 if vgname not in res.payload:
11838 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11841 def _CheckDisksExistence(self, nodes):
11842 # Check disk existence
11843 for idx, dev in enumerate(self.instance.disks):
11844 if idx not in self.disks:
11848 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11849 self.cfg.SetDiskID(dev, node)
11851 result = _BlockdevFind(self, node, dev, self.instance)
11853 msg = result.fail_msg
11854 if msg or not result.payload:
11856 msg = "disk not found"
11857 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11860 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11861 for idx, dev in enumerate(self.instance.disks):
11862 if idx not in self.disks:
11865 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11868 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11869 on_primary, ldisk=ldisk):
11870 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11871 " replace disks for instance %s" %
11872 (node_name, self.instance.name))
11874 def _CreateNewStorage(self, node_name):
11875 """Create new storage on the primary or secondary node.
11877 This is only used for same-node replaces, not for changing the
11878 secondary node, hence we don't want to modify the existing disk.
11883 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11884 for idx, dev in enumerate(disks):
11885 if idx not in self.disks:
11888 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11890 self.cfg.SetDiskID(dev, node_name)
11892 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11893 names = _GenerateUniqueNames(self.lu, lv_names)
11895 (data_disk, meta_disk) = dev.children
11896 vg_data = data_disk.logical_id[0]
11897 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11898 logical_id=(vg_data, names[0]),
11899 params=data_disk.params)
11900 vg_meta = meta_disk.logical_id[0]
11901 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11902 size=constants.DRBD_META_SIZE,
11903 logical_id=(vg_meta, names[1]),
11904 params=meta_disk.params)
11906 new_lvs = [lv_data, lv_meta]
11907 old_lvs = [child.Copy() for child in dev.children]
11908 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11909 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11911 # we pass force_create=True to force the LVM creation
11912 for new_lv in new_lvs:
11913 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11914 _GetInstanceInfoText(self.instance), False,
11919 def _CheckDevices(self, node_name, iv_names):
11920 for name, (dev, _, _) in iv_names.iteritems():
11921 self.cfg.SetDiskID(dev, node_name)
11923 result = _BlockdevFind(self, node_name, dev, self.instance)
11925 msg = result.fail_msg
11926 if msg or not result.payload:
11928 msg = "disk not found"
11929 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11932 if result.payload.is_degraded:
11933 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11935 def _RemoveOldStorage(self, node_name, iv_names):
11936 for name, (_, old_lvs, _) in iv_names.iteritems():
11937 self.lu.LogInfo("Remove logical volumes for %s", name)
11940 self.cfg.SetDiskID(lv, node_name)
11942 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11944 self.lu.LogWarning("Can't remove old LV: %s", msg,
11945 hint="remove unused LVs manually")
11947 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11948 """Replace a disk on the primary or secondary for DRBD 8.
11950 The algorithm for replace is quite complicated:
11952 1. for each disk to be replaced:
11954 1. create new LVs on the target node with unique names
11955 1. detach old LVs from the drbd device
11956 1. rename old LVs to name_replaced.<time_t>
11957 1. rename new LVs to old LVs
11958 1. attach the new LVs (with the old names now) to the drbd device
11960 1. wait for sync across all devices
11962 1. for each modified disk:
11964 1. remove old LVs (which have the name name_replaces.<time_t>)
11966 Failures are not very well handled.
11971 # Step: check device activation
11972 self.lu.LogStep(1, steps_total, "Check device existence")
11973 self._CheckDisksExistence([self.other_node, self.target_node])
11974 self._CheckVolumeGroup([self.target_node, self.other_node])
11976 # Step: check other node consistency
11977 self.lu.LogStep(2, steps_total, "Check peer consistency")
11978 self._CheckDisksConsistency(self.other_node,
11979 self.other_node == self.instance.primary_node,
11982 # Step: create new storage
11983 self.lu.LogStep(3, steps_total, "Allocate new storage")
11984 iv_names = self._CreateNewStorage(self.target_node)
11986 # Step: for each lv, detach+rename*2+attach
11987 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11988 for dev, old_lvs, new_lvs in iv_names.itervalues():
11989 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11991 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11993 result.Raise("Can't detach drbd from local storage on node"
11994 " %s for device %s" % (self.target_node, dev.iv_name))
11996 #cfg.Update(instance)
11998 # ok, we created the new LVs, so now we know we have the needed
11999 # storage; as such, we proceed on the target node to rename
12000 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12001 # using the assumption that logical_id == physical_id (which in
12002 # turn is the unique_id on that node)
12004 # FIXME(iustin): use a better name for the replaced LVs
12005 temp_suffix = int(time.time())
12006 ren_fn = lambda d, suff: (d.physical_id[0],
12007 d.physical_id[1] + "_replaced-%s" % suff)
12009 # Build the rename list based on what LVs exist on the node
12010 rename_old_to_new = []
12011 for to_ren in old_lvs:
12012 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12013 if not result.fail_msg and result.payload:
12015 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12017 self.lu.LogInfo("Renaming the old LVs on the target node")
12018 result = self.rpc.call_blockdev_rename(self.target_node,
12020 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12022 # Now we rename the new LVs to the old LVs
12023 self.lu.LogInfo("Renaming the new LVs on the target node")
12024 rename_new_to_old = [(new, old.physical_id)
12025 for old, new in zip(old_lvs, new_lvs)]
12026 result = self.rpc.call_blockdev_rename(self.target_node,
12028 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12030 # Intermediate steps of in memory modifications
12031 for old, new in zip(old_lvs, new_lvs):
12032 new.logical_id = old.logical_id
12033 self.cfg.SetDiskID(new, self.target_node)
12035 # We need to modify old_lvs so that removal later removes the
12036 # right LVs, not the newly added ones; note that old_lvs is a
12038 for disk in old_lvs:
12039 disk.logical_id = ren_fn(disk, temp_suffix)
12040 self.cfg.SetDiskID(disk, self.target_node)
12042 # Now that the new lvs have the old name, we can add them to the device
12043 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12044 result = self.rpc.call_blockdev_addchildren(self.target_node,
12045 (dev, self.instance), new_lvs)
12046 msg = result.fail_msg
12048 for new_lv in new_lvs:
12049 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12052 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12053 hint=("cleanup manually the unused logical"
12055 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12057 cstep = itertools.count(5)
12059 if self.early_release:
12060 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12061 self._RemoveOldStorage(self.target_node, iv_names)
12062 # TODO: Check if releasing locks early still makes sense
12063 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12065 # Release all resource locks except those used by the instance
12066 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12067 keep=self.node_secondary_ip.keys())
12069 # Release all node locks while waiting for sync
12070 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12072 # TODO: Can the instance lock be downgraded here? Take the optional disk
12073 # shutdown in the caller into consideration.
12076 # This can fail as the old devices are degraded and _WaitForSync
12077 # does a combined result over all disks, so we don't check its return value
12078 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12079 _WaitForSync(self.lu, self.instance)
12081 # Check all devices manually
12082 self._CheckDevices(self.instance.primary_node, iv_names)
12084 # Step: remove old storage
12085 if not self.early_release:
12086 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12087 self._RemoveOldStorage(self.target_node, iv_names)
12089 def _ExecDrbd8Secondary(self, feedback_fn):
12090 """Replace the secondary node for DRBD 8.
12092 The algorithm for replace is quite complicated:
12093 - for all disks of the instance:
12094 - create new LVs on the new node with same names
12095 - shutdown the drbd device on the old secondary
12096 - disconnect the drbd network on the primary
12097 - create the drbd device on the new secondary
12098 - network attach the drbd on the primary, using an artifice:
12099 the drbd code for Attach() will connect to the network if it
12100 finds a device which is connected to the good local disks but
12101 not network enabled
12102 - wait for sync across all devices
12103 - remove all disks from the old secondary
12105 Failures are not very well handled.
12110 pnode = self.instance.primary_node
12112 # Step: check device activation
12113 self.lu.LogStep(1, steps_total, "Check device existence")
12114 self._CheckDisksExistence([self.instance.primary_node])
12115 self._CheckVolumeGroup([self.instance.primary_node])
12117 # Step: check other node consistency
12118 self.lu.LogStep(2, steps_total, "Check peer consistency")
12119 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12121 # Step: create new storage
12122 self.lu.LogStep(3, steps_total, "Allocate new storage")
12123 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12124 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12125 for idx, dev in enumerate(disks):
12126 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12127 (self.new_node, idx))
12128 # we pass force_create=True to force LVM creation
12129 for new_lv in dev.children:
12130 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12131 True, _GetInstanceInfoText(self.instance), False,
12134 # Step 4: dbrd minors and drbd setups changes
12135 # after this, we must manually remove the drbd minors on both the
12136 # error and the success paths
12137 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12138 minors = self.cfg.AllocateDRBDMinor([self.new_node
12139 for dev in self.instance.disks],
12140 self.instance.name)
12141 logging.debug("Allocated minors %r", minors)
12144 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12145 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12146 (self.new_node, idx))
12147 # create new devices on new_node; note that we create two IDs:
12148 # one without port, so the drbd will be activated without
12149 # networking information on the new node at this stage, and one
12150 # with network, for the latter activation in step 4
12151 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12152 if self.instance.primary_node == o_node1:
12155 assert self.instance.primary_node == o_node2, "Three-node instance?"
12158 new_alone_id = (self.instance.primary_node, self.new_node, None,
12159 p_minor, new_minor, o_secret)
12160 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12161 p_minor, new_minor, o_secret)
12163 iv_names[idx] = (dev, dev.children, new_net_id)
12164 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12166 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12167 logical_id=new_alone_id,
12168 children=dev.children,
12171 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12174 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12176 _GetInstanceInfoText(self.instance), False,
12178 except errors.GenericError:
12179 self.cfg.ReleaseDRBDMinors(self.instance.name)
12182 # We have new devices, shutdown the drbd on the old secondary
12183 for idx, dev in enumerate(self.instance.disks):
12184 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12185 self.cfg.SetDiskID(dev, self.target_node)
12186 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12187 (dev, self.instance)).fail_msg
12189 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12190 "node: %s" % (idx, msg),
12191 hint=("Please cleanup this device manually as"
12192 " soon as possible"))
12194 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12195 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12196 self.instance.disks)[pnode]
12198 msg = result.fail_msg
12200 # detaches didn't succeed (unlikely)
12201 self.cfg.ReleaseDRBDMinors(self.instance.name)
12202 raise errors.OpExecError("Can't detach the disks from the network on"
12203 " old node: %s" % (msg,))
12205 # if we managed to detach at least one, we update all the disks of
12206 # the instance to point to the new secondary
12207 self.lu.LogInfo("Updating instance configuration")
12208 for dev, _, new_logical_id in iv_names.itervalues():
12209 dev.logical_id = new_logical_id
12210 self.cfg.SetDiskID(dev, self.instance.primary_node)
12212 self.cfg.Update(self.instance, feedback_fn)
12214 # Release all node locks (the configuration has been updated)
12215 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12217 # and now perform the drbd attach
12218 self.lu.LogInfo("Attaching primary drbds to new secondary"
12219 " (standalone => connected)")
12220 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12222 self.node_secondary_ip,
12223 (self.instance.disks, self.instance),
12224 self.instance.name,
12226 for to_node, to_result in result.items():
12227 msg = to_result.fail_msg
12229 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12231 hint=("please do a gnt-instance info to see the"
12232 " status of disks"))
12234 cstep = itertools.count(5)
12236 if self.early_release:
12237 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12238 self._RemoveOldStorage(self.target_node, iv_names)
12239 # TODO: Check if releasing locks early still makes sense
12240 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12242 # Release all resource locks except those used by the instance
12243 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12244 keep=self.node_secondary_ip.keys())
12246 # TODO: Can the instance lock be downgraded here? Take the optional disk
12247 # shutdown in the caller into consideration.
12250 # This can fail as the old devices are degraded and _WaitForSync
12251 # does a combined result over all disks, so we don't check its return value
12252 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12253 _WaitForSync(self.lu, self.instance)
12255 # Check all devices manually
12256 self._CheckDevices(self.instance.primary_node, iv_names)
12258 # Step: remove old storage
12259 if not self.early_release:
12260 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12261 self._RemoveOldStorage(self.target_node, iv_names)
12264 class LURepairNodeStorage(NoHooksLU):
12265 """Repairs the volume group on a node.
12270 def CheckArguments(self):
12271 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12273 storage_type = self.op.storage_type
12275 if (constants.SO_FIX_CONSISTENCY not in
12276 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12277 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12278 " repaired" % storage_type,
12279 errors.ECODE_INVAL)
12281 def ExpandNames(self):
12282 self.needed_locks = {
12283 locking.LEVEL_NODE: [self.op.node_name],
12286 def _CheckFaultyDisks(self, instance, node_name):
12287 """Ensure faulty disks abort the opcode or at least warn."""
12289 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12291 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12292 " node '%s'" % (instance.name, node_name),
12293 errors.ECODE_STATE)
12294 except errors.OpPrereqError, err:
12295 if self.op.ignore_consistency:
12296 self.LogWarning(str(err.args[0]))
12300 def CheckPrereq(self):
12301 """Check prerequisites.
12304 # Check whether any instance on this node has faulty disks
12305 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12306 if inst.admin_state != constants.ADMINST_UP:
12308 check_nodes = set(inst.all_nodes)
12309 check_nodes.discard(self.op.node_name)
12310 for inst_node_name in check_nodes:
12311 self._CheckFaultyDisks(inst, inst_node_name)
12313 def Exec(self, feedback_fn):
12314 feedback_fn("Repairing storage unit '%s' on %s ..." %
12315 (self.op.name, self.op.node_name))
12317 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12318 result = self.rpc.call_storage_execute(self.op.node_name,
12319 self.op.storage_type, st_args,
12321 constants.SO_FIX_CONSISTENCY)
12322 result.Raise("Failed to repair storage unit '%s' on %s" %
12323 (self.op.name, self.op.node_name))
12326 class LUNodeEvacuate(NoHooksLU):
12327 """Evacuates instances off a list of nodes.
12332 _MODE2IALLOCATOR = {
12333 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12334 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12335 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12337 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12338 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12339 constants.IALLOCATOR_NEVAC_MODES)
12341 def CheckArguments(self):
12342 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12344 def ExpandNames(self):
12345 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12347 if self.op.remote_node is not None:
12348 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12349 assert self.op.remote_node
12351 if self.op.remote_node == self.op.node_name:
12352 raise errors.OpPrereqError("Can not use evacuated node as a new"
12353 " secondary node", errors.ECODE_INVAL)
12355 if self.op.mode != constants.NODE_EVAC_SEC:
12356 raise errors.OpPrereqError("Without the use of an iallocator only"
12357 " secondary instances can be evacuated",
12358 errors.ECODE_INVAL)
12361 self.share_locks = _ShareAll()
12362 self.needed_locks = {
12363 locking.LEVEL_INSTANCE: [],
12364 locking.LEVEL_NODEGROUP: [],
12365 locking.LEVEL_NODE: [],
12368 # Determine nodes (via group) optimistically, needs verification once locks
12369 # have been acquired
12370 self.lock_nodes = self._DetermineNodes()
12372 def _DetermineNodes(self):
12373 """Gets the list of nodes to operate on.
12376 if self.op.remote_node is None:
12377 # Iallocator will choose any node(s) in the same group
12378 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12380 group_nodes = frozenset([self.op.remote_node])
12382 # Determine nodes to be locked
12383 return set([self.op.node_name]) | group_nodes
12385 def _DetermineInstances(self):
12386 """Builds list of instances to operate on.
12389 assert self.op.mode in constants.NODE_EVAC_MODES
12391 if self.op.mode == constants.NODE_EVAC_PRI:
12392 # Primary instances only
12393 inst_fn = _GetNodePrimaryInstances
12394 assert self.op.remote_node is None, \
12395 "Evacuating primary instances requires iallocator"
12396 elif self.op.mode == constants.NODE_EVAC_SEC:
12397 # Secondary instances only
12398 inst_fn = _GetNodeSecondaryInstances
12401 assert self.op.mode == constants.NODE_EVAC_ALL
12402 inst_fn = _GetNodeInstances
12403 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12405 raise errors.OpPrereqError("Due to an issue with the iallocator"
12406 " interface it is not possible to evacuate"
12407 " all instances at once; specify explicitly"
12408 " whether to evacuate primary or secondary"
12410 errors.ECODE_INVAL)
12412 return inst_fn(self.cfg, self.op.node_name)
12414 def DeclareLocks(self, level):
12415 if level == locking.LEVEL_INSTANCE:
12416 # Lock instances optimistically, needs verification once node and group
12417 # locks have been acquired
12418 self.needed_locks[locking.LEVEL_INSTANCE] = \
12419 set(i.name for i in self._DetermineInstances())
12421 elif level == locking.LEVEL_NODEGROUP:
12422 # Lock node groups for all potential target nodes optimistically, needs
12423 # verification once nodes have been acquired
12424 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12425 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12427 elif level == locking.LEVEL_NODE:
12428 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12430 def CheckPrereq(self):
12432 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12433 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12434 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12436 need_nodes = self._DetermineNodes()
12438 if not owned_nodes.issuperset(need_nodes):
12439 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12440 " locks were acquired, current nodes are"
12441 " are '%s', used to be '%s'; retry the"
12443 (self.op.node_name,
12444 utils.CommaJoin(need_nodes),
12445 utils.CommaJoin(owned_nodes)),
12446 errors.ECODE_STATE)
12448 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12449 if owned_groups != wanted_groups:
12450 raise errors.OpExecError("Node groups changed since locks were acquired,"
12451 " current groups are '%s', used to be '%s';"
12452 " retry the operation" %
12453 (utils.CommaJoin(wanted_groups),
12454 utils.CommaJoin(owned_groups)))
12456 # Determine affected instances
12457 self.instances = self._DetermineInstances()
12458 self.instance_names = [i.name for i in self.instances]
12460 if set(self.instance_names) != owned_instances:
12461 raise errors.OpExecError("Instances on node '%s' changed since locks"
12462 " were acquired, current instances are '%s',"
12463 " used to be '%s'; retry the operation" %
12464 (self.op.node_name,
12465 utils.CommaJoin(self.instance_names),
12466 utils.CommaJoin(owned_instances)))
12468 if self.instance_names:
12469 self.LogInfo("Evacuating instances from node '%s': %s",
12471 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12473 self.LogInfo("No instances to evacuate from node '%s'",
12476 if self.op.remote_node is not None:
12477 for i in self.instances:
12478 if i.primary_node == self.op.remote_node:
12479 raise errors.OpPrereqError("Node %s is the primary node of"
12480 " instance %s, cannot use it as"
12482 (self.op.remote_node, i.name),
12483 errors.ECODE_INVAL)
12485 def Exec(self, feedback_fn):
12486 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12488 if not self.instance_names:
12489 # No instances to evacuate
12492 elif self.op.iallocator is not None:
12493 # TODO: Implement relocation to other group
12494 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12495 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12496 instances=list(self.instance_names))
12497 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12499 ial.Run(self.op.iallocator)
12501 if not ial.success:
12502 raise errors.OpPrereqError("Can't compute node evacuation using"
12503 " iallocator '%s': %s" %
12504 (self.op.iallocator, ial.info),
12505 errors.ECODE_NORES)
12507 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12509 elif self.op.remote_node is not None:
12510 assert self.op.mode == constants.NODE_EVAC_SEC
12512 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12513 remote_node=self.op.remote_node,
12515 mode=constants.REPLACE_DISK_CHG,
12516 early_release=self.op.early_release)]
12517 for instance_name in self.instance_names]
12520 raise errors.ProgrammerError("No iallocator or remote node")
12522 return ResultWithJobs(jobs)
12525 def _SetOpEarlyRelease(early_release, op):
12526 """Sets C{early_release} flag on opcodes if available.
12530 op.early_release = early_release
12531 except AttributeError:
12532 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12537 def _NodeEvacDest(use_nodes, group, nodes):
12538 """Returns group or nodes depending on caller's choice.
12542 return utils.CommaJoin(nodes)
12547 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12548 """Unpacks the result of change-group and node-evacuate iallocator requests.
12550 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12551 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12553 @type lu: L{LogicalUnit}
12554 @param lu: Logical unit instance
12555 @type alloc_result: tuple/list
12556 @param alloc_result: Result from iallocator
12557 @type early_release: bool
12558 @param early_release: Whether to release locks early if possible
12559 @type use_nodes: bool
12560 @param use_nodes: Whether to display node names instead of groups
12563 (moved, failed, jobs) = alloc_result
12566 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12567 for (name, reason) in failed)
12568 lu.LogWarning("Unable to evacuate instances %s", failreason)
12569 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12572 lu.LogInfo("Instances to be moved: %s",
12573 utils.CommaJoin("%s (to %s)" %
12574 (name, _NodeEvacDest(use_nodes, group, nodes))
12575 for (name, group, nodes) in moved))
12577 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12578 map(opcodes.OpCode.LoadOpCode, ops))
12582 def _DiskSizeInBytesToMebibytes(lu, size):
12583 """Converts a disk size in bytes to mebibytes.
12585 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12588 (mib, remainder) = divmod(size, 1024 * 1024)
12591 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12592 " to not overwrite existing data (%s bytes will not be"
12593 " wiped)", (1024 * 1024) - remainder)
12599 class LUInstanceGrowDisk(LogicalUnit):
12600 """Grow a disk of an instance.
12603 HPATH = "disk-grow"
12604 HTYPE = constants.HTYPE_INSTANCE
12607 def ExpandNames(self):
12608 self._ExpandAndLockInstance()
12609 self.needed_locks[locking.LEVEL_NODE] = []
12610 self.needed_locks[locking.LEVEL_NODE_RES] = []
12611 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12612 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12614 def DeclareLocks(self, level):
12615 if level == locking.LEVEL_NODE:
12616 self._LockInstancesNodes()
12617 elif level == locking.LEVEL_NODE_RES:
12619 self.needed_locks[locking.LEVEL_NODE_RES] = \
12620 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12622 def BuildHooksEnv(self):
12623 """Build hooks env.
12625 This runs on the master, the primary and all the secondaries.
12629 "DISK": self.op.disk,
12630 "AMOUNT": self.op.amount,
12631 "ABSOLUTE": self.op.absolute,
12633 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12636 def BuildHooksNodes(self):
12637 """Build hooks nodes.
12640 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12643 def CheckPrereq(self):
12644 """Check prerequisites.
12646 This checks that the instance is in the cluster.
12649 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12650 assert instance is not None, \
12651 "Cannot retrieve locked instance %s" % self.op.instance_name
12652 nodenames = list(instance.all_nodes)
12653 for node in nodenames:
12654 _CheckNodeOnline(self, node)
12656 self.instance = instance
12658 if instance.disk_template not in constants.DTS_GROWABLE:
12659 raise errors.OpPrereqError("Instance's disk layout does not support"
12660 " growing", errors.ECODE_INVAL)
12662 self.disk = instance.FindDisk(self.op.disk)
12664 if self.op.absolute:
12665 self.target = self.op.amount
12666 self.delta = self.target - self.disk.size
12668 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12669 "current disk size (%s)" %
12670 (utils.FormatUnit(self.target, "h"),
12671 utils.FormatUnit(self.disk.size, "h")),
12672 errors.ECODE_STATE)
12674 self.delta = self.op.amount
12675 self.target = self.disk.size + self.delta
12677 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12678 utils.FormatUnit(self.delta, "h"),
12679 errors.ECODE_INVAL)
12681 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12683 def _CheckDiskSpace(self, nodenames, req_vgspace):
12684 template = self.instance.disk_template
12685 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12686 # TODO: check the free disk space for file, when that feature will be
12688 nodes = map(self.cfg.GetNodeInfo, nodenames)
12689 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12692 # With exclusive storage we need to something smarter than just looking
12693 # at free space; for now, let's simply abort the operation.
12694 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12695 " is enabled", errors.ECODE_STATE)
12696 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12698 def Exec(self, feedback_fn):
12699 """Execute disk grow.
12702 instance = self.instance
12705 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12706 assert (self.owned_locks(locking.LEVEL_NODE) ==
12707 self.owned_locks(locking.LEVEL_NODE_RES))
12709 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12711 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12713 raise errors.OpExecError("Cannot activate block device to grow")
12715 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12716 (self.op.disk, instance.name,
12717 utils.FormatUnit(self.delta, "h"),
12718 utils.FormatUnit(self.target, "h")))
12720 # First run all grow ops in dry-run mode
12721 for node in instance.all_nodes:
12722 self.cfg.SetDiskID(disk, node)
12723 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12725 result.Raise("Dry-run grow request failed to node %s" % node)
12728 # Get disk size from primary node for wiping
12729 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12730 result.Raise("Failed to retrieve disk size from node '%s'" %
12731 instance.primary_node)
12733 (disk_size_in_bytes, ) = result.payload
12735 if disk_size_in_bytes is None:
12736 raise errors.OpExecError("Failed to retrieve disk size from primary"
12737 " node '%s'" % instance.primary_node)
12739 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12741 assert old_disk_size >= disk.size, \
12742 ("Retrieved disk size too small (got %s, should be at least %s)" %
12743 (old_disk_size, disk.size))
12745 old_disk_size = None
12747 # We know that (as far as we can test) operations across different
12748 # nodes will succeed, time to run it for real on the backing storage
12749 for node in instance.all_nodes:
12750 self.cfg.SetDiskID(disk, node)
12751 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12753 result.Raise("Grow request failed to node %s" % node)
12755 # And now execute it for logical storage, on the primary node
12756 node = instance.primary_node
12757 self.cfg.SetDiskID(disk, node)
12758 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12760 result.Raise("Grow request failed to node %s" % node)
12762 disk.RecordGrow(self.delta)
12763 self.cfg.Update(instance, feedback_fn)
12765 # Changes have been recorded, release node lock
12766 _ReleaseLocks(self, locking.LEVEL_NODE)
12768 # Downgrade lock while waiting for sync
12769 self.glm.downgrade(locking.LEVEL_INSTANCE)
12771 assert wipe_disks ^ (old_disk_size is None)
12774 assert instance.disks[self.op.disk] == disk
12776 # Wipe newly added disk space
12777 _WipeDisks(self, instance,
12778 disks=[(self.op.disk, disk, old_disk_size)])
12780 if self.op.wait_for_sync:
12781 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12783 self.LogWarning("Disk syncing has not returned a good status; check"
12785 if instance.admin_state != constants.ADMINST_UP:
12786 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12787 elif instance.admin_state != constants.ADMINST_UP:
12788 self.LogWarning("Not shutting down the disk even if the instance is"
12789 " not supposed to be running because no wait for"
12790 " sync mode was requested")
12792 assert self.owned_locks(locking.LEVEL_NODE_RES)
12793 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12796 class LUInstanceQueryData(NoHooksLU):
12797 """Query runtime instance data.
12802 def ExpandNames(self):
12803 self.needed_locks = {}
12805 # Use locking if requested or when non-static information is wanted
12806 if not (self.op.static or self.op.use_locking):
12807 self.LogWarning("Non-static data requested, locks need to be acquired")
12808 self.op.use_locking = True
12810 if self.op.instances or not self.op.use_locking:
12811 # Expand instance names right here
12812 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12814 # Will use acquired locks
12815 self.wanted_names = None
12817 if self.op.use_locking:
12818 self.share_locks = _ShareAll()
12820 if self.wanted_names is None:
12821 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12823 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12825 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12826 self.needed_locks[locking.LEVEL_NODE] = []
12827 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12829 def DeclareLocks(self, level):
12830 if self.op.use_locking:
12831 if level == locking.LEVEL_NODEGROUP:
12832 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12834 # Lock all groups used by instances optimistically; this requires going
12835 # via the node before it's locked, requiring verification later on
12836 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12837 frozenset(group_uuid
12838 for instance_name in owned_instances
12840 self.cfg.GetInstanceNodeGroups(instance_name))
12842 elif level == locking.LEVEL_NODE:
12843 self._LockInstancesNodes()
12845 def CheckPrereq(self):
12846 """Check prerequisites.
12848 This only checks the optional instance list against the existing names.
12851 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12852 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12853 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12855 if self.wanted_names is None:
12856 assert self.op.use_locking, "Locking was not used"
12857 self.wanted_names = owned_instances
12859 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12861 if self.op.use_locking:
12862 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12865 assert not (owned_instances or owned_groups or owned_nodes)
12867 self.wanted_instances = instances.values()
12869 def _ComputeBlockdevStatus(self, node, instance, dev):
12870 """Returns the status of a block device
12873 if self.op.static or not node:
12876 self.cfg.SetDiskID(dev, node)
12878 result = self.rpc.call_blockdev_find(node, dev)
12882 result.Raise("Can't compute disk status for %s" % instance.name)
12884 status = result.payload
12888 return (status.dev_path, status.major, status.minor,
12889 status.sync_percent, status.estimated_time,
12890 status.is_degraded, status.ldisk_status)
12892 def _ComputeDiskStatus(self, instance, snode, dev):
12893 """Compute block device status.
12896 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12898 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12900 def _ComputeDiskStatusInner(self, instance, snode, dev):
12901 """Compute block device status.
12903 @attention: The device has to be annotated already.
12906 if dev.dev_type in constants.LDS_DRBD:
12907 # we change the snode then (otherwise we use the one passed in)
12908 if dev.logical_id[0] == instance.primary_node:
12909 snode = dev.logical_id[1]
12911 snode = dev.logical_id[0]
12913 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12915 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12918 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12925 "iv_name": dev.iv_name,
12926 "dev_type": dev.dev_type,
12927 "logical_id": dev.logical_id,
12928 "physical_id": dev.physical_id,
12929 "pstatus": dev_pstatus,
12930 "sstatus": dev_sstatus,
12931 "children": dev_children,
12936 def Exec(self, feedback_fn):
12937 """Gather and return data"""
12940 cluster = self.cfg.GetClusterInfo()
12942 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12943 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12945 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12946 for node in nodes.values()))
12948 group2name_fn = lambda uuid: groups[uuid].name
12950 for instance in self.wanted_instances:
12951 pnode = nodes[instance.primary_node]
12953 if self.op.static or pnode.offline:
12954 remote_state = None
12956 self.LogWarning("Primary node %s is marked offline, returning static"
12957 " information only for instance %s" %
12958 (pnode.name, instance.name))
12960 remote_info = self.rpc.call_instance_info(instance.primary_node,
12962 instance.hypervisor)
12963 remote_info.Raise("Error checking node %s" % instance.primary_node)
12964 remote_info = remote_info.payload
12965 if remote_info and "state" in remote_info:
12966 remote_state = "up"
12968 if instance.admin_state == constants.ADMINST_UP:
12969 remote_state = "down"
12971 remote_state = instance.admin_state
12973 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12976 snodes_group_uuids = [nodes[snode_name].group
12977 for snode_name in instance.secondary_nodes]
12979 result[instance.name] = {
12980 "name": instance.name,
12981 "config_state": instance.admin_state,
12982 "run_state": remote_state,
12983 "pnode": instance.primary_node,
12984 "pnode_group_uuid": pnode.group,
12985 "pnode_group_name": group2name_fn(pnode.group),
12986 "snodes": instance.secondary_nodes,
12987 "snodes_group_uuids": snodes_group_uuids,
12988 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12990 # this happens to be the same format used for hooks
12991 "nics": _NICListToTuple(self, instance.nics),
12992 "disk_template": instance.disk_template,
12994 "hypervisor": instance.hypervisor,
12995 "network_port": instance.network_port,
12996 "hv_instance": instance.hvparams,
12997 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12998 "be_instance": instance.beparams,
12999 "be_actual": cluster.FillBE(instance),
13000 "os_instance": instance.osparams,
13001 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13002 "serial_no": instance.serial_no,
13003 "mtime": instance.mtime,
13004 "ctime": instance.ctime,
13005 "uuid": instance.uuid,
13011 def PrepareContainerMods(mods, private_fn):
13012 """Prepares a list of container modifications by adding a private data field.
13014 @type mods: list of tuples; (operation, index, parameters)
13015 @param mods: List of modifications
13016 @type private_fn: callable or None
13017 @param private_fn: Callable for constructing a private data field for a
13022 if private_fn is None:
13027 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13030 #: Type description for changes as returned by L{ApplyContainerMods}'s
13032 _TApplyContModsCbChanges = \
13033 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13034 ht.TNonEmptyString,
13039 def ApplyContainerMods(kind, container, chgdesc, mods,
13040 create_fn, modify_fn, remove_fn):
13041 """Applies descriptions in C{mods} to C{container}.
13044 @param kind: One-word item description
13045 @type container: list
13046 @param container: Container to modify
13047 @type chgdesc: None or list
13048 @param chgdesc: List of applied changes
13050 @param mods: Modifications as returned by L{PrepareContainerMods}
13051 @type create_fn: callable
13052 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13053 receives absolute item index, parameters and private data object as added
13054 by L{PrepareContainerMods}, returns tuple containing new item and changes
13056 @type modify_fn: callable
13057 @param modify_fn: Callback for modifying an existing item
13058 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13059 and private data object as added by L{PrepareContainerMods}, returns
13061 @type remove_fn: callable
13062 @param remove_fn: Callback on removing item; receives absolute item index,
13063 item and private data object as added by L{PrepareContainerMods}
13066 for (op, idx, params, private) in mods:
13069 absidx = len(container) - 1
13071 raise IndexError("Not accepting negative indices other than -1")
13072 elif idx > len(container):
13073 raise IndexError("Got %s index %s, but there are only %s" %
13074 (kind, idx, len(container)))
13080 if op == constants.DDM_ADD:
13081 # Calculate where item will be added
13083 addidx = len(container)
13087 if create_fn is None:
13090 (item, changes) = create_fn(addidx, params, private)
13093 container.append(item)
13096 assert idx <= len(container)
13097 # list.insert does so before the specified index
13098 container.insert(idx, item)
13100 # Retrieve existing item
13102 item = container[absidx]
13104 raise IndexError("Invalid %s index %s" % (kind, idx))
13106 if op == constants.DDM_REMOVE:
13109 if remove_fn is not None:
13110 remove_fn(absidx, item, private)
13112 changes = [("%s/%s" % (kind, absidx), "remove")]
13114 assert container[absidx] == item
13115 del container[absidx]
13116 elif op == constants.DDM_MODIFY:
13117 if modify_fn is not None:
13118 changes = modify_fn(absidx, item, params, private)
13120 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13122 assert _TApplyContModsCbChanges(changes)
13124 if not (chgdesc is None or changes is None):
13125 chgdesc.extend(changes)
13128 def _UpdateIvNames(base_index, disks):
13129 """Updates the C{iv_name} attribute of disks.
13131 @type disks: list of L{objects.Disk}
13134 for (idx, disk) in enumerate(disks):
13135 disk.iv_name = "disk/%s" % (base_index + idx, )
13138 class _InstNicModPrivate:
13139 """Data structure for network interface modifications.
13141 Used by L{LUInstanceSetParams}.
13144 def __init__(self):
13149 class LUInstanceSetParams(LogicalUnit):
13150 """Modifies an instances's parameters.
13153 HPATH = "instance-modify"
13154 HTYPE = constants.HTYPE_INSTANCE
13158 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13159 assert ht.TList(mods)
13160 assert not mods or len(mods[0]) in (2, 3)
13162 if mods and len(mods[0]) == 2:
13166 for op, params in mods:
13167 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13168 result.append((op, -1, params))
13172 raise errors.OpPrereqError("Only one %s add or remove operation is"
13173 " supported at a time" % kind,
13174 errors.ECODE_INVAL)
13176 result.append((constants.DDM_MODIFY, op, params))
13178 assert verify_fn(result)
13185 def _CheckMods(kind, mods, key_types, item_fn):
13186 """Ensures requested disk/NIC modifications are valid.
13189 for (op, _, params) in mods:
13190 assert ht.TDict(params)
13192 # If 'key_types' is an empty dict, we assume we have an
13193 # 'ext' template and thus do not ForceDictType
13195 utils.ForceDictType(params, key_types)
13197 if op == constants.DDM_REMOVE:
13199 raise errors.OpPrereqError("No settings should be passed when"
13200 " removing a %s" % kind,
13201 errors.ECODE_INVAL)
13202 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13203 item_fn(op, params)
13205 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13208 def _VerifyDiskModification(op, params):
13209 """Verifies a disk modification.
13212 if op == constants.DDM_ADD:
13213 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13214 if mode not in constants.DISK_ACCESS_SET:
13215 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13216 errors.ECODE_INVAL)
13218 size = params.get(constants.IDISK_SIZE, None)
13220 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13221 constants.IDISK_SIZE, errors.ECODE_INVAL)
13225 except (TypeError, ValueError), err:
13226 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13227 errors.ECODE_INVAL)
13229 params[constants.IDISK_SIZE] = size
13231 elif op == constants.DDM_MODIFY:
13232 if constants.IDISK_SIZE in params:
13233 raise errors.OpPrereqError("Disk size change not possible, use"
13234 " grow-disk", errors.ECODE_INVAL)
13235 if constants.IDISK_MODE not in params:
13236 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13237 " modification supported, but missing",
13238 errors.ECODE_NOENT)
13239 if len(params) > 1:
13240 raise errors.OpPrereqError("Disk modification doesn't support"
13241 " additional arbitrary parameters",
13242 errors.ECODE_INVAL)
13245 def _VerifyNicModification(op, params):
13246 """Verifies a network interface modification.
13249 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13250 ip = params.get(constants.INIC_IP, None)
13251 req_net = params.get(constants.INIC_NETWORK, None)
13252 link = params.get(constants.NIC_LINK, None)
13253 mode = params.get(constants.NIC_MODE, None)
13254 if req_net is not None:
13255 if req_net.lower() == constants.VALUE_NONE:
13256 params[constants.INIC_NETWORK] = None
13258 elif link is not None or mode is not None:
13259 raise errors.OpPrereqError("If network is given"
13260 " mode or link should not",
13261 errors.ECODE_INVAL)
13263 if op == constants.DDM_ADD:
13264 macaddr = params.get(constants.INIC_MAC, None)
13265 if macaddr is None:
13266 params[constants.INIC_MAC] = constants.VALUE_AUTO
13269 if ip.lower() == constants.VALUE_NONE:
13270 params[constants.INIC_IP] = None
13272 if ip.lower() == constants.NIC_IP_POOL:
13273 if op == constants.DDM_ADD and req_net is None:
13274 raise errors.OpPrereqError("If ip=pool, parameter network"
13276 errors.ECODE_INVAL)
13278 if not netutils.IPAddress.IsValid(ip):
13279 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13280 errors.ECODE_INVAL)
13282 if constants.INIC_MAC in params:
13283 macaddr = params[constants.INIC_MAC]
13284 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13285 macaddr = utils.NormalizeAndValidateMac(macaddr)
13287 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13288 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13289 " modifying an existing NIC",
13290 errors.ECODE_INVAL)
13292 def CheckArguments(self):
13293 if not (self.op.nics or self.op.disks or self.op.disk_template or
13294 self.op.hvparams or self.op.beparams or self.op.os_name or
13295 self.op.offline is not None or self.op.runtime_mem):
13296 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13298 if self.op.hvparams:
13299 _CheckGlobalHvParams(self.op.hvparams)
13301 self.op.disks = self._UpgradeDiskNicMods(
13302 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13303 self.op.nics = self._UpgradeDiskNicMods(
13304 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13306 if self.op.disks and self.op.disk_template is not None:
13307 raise errors.OpPrereqError("Disk template conversion and other disk"
13308 " changes not supported at the same time",
13309 errors.ECODE_INVAL)
13311 if (self.op.disk_template and
13312 self.op.disk_template in constants.DTS_INT_MIRROR and
13313 self.op.remote_node is None):
13314 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13315 " one requires specifying a secondary node",
13316 errors.ECODE_INVAL)
13318 # Check NIC modifications
13319 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13320 self._VerifyNicModification)
13322 def ExpandNames(self):
13323 self._ExpandAndLockInstance()
13324 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13325 # Can't even acquire node locks in shared mode as upcoming changes in
13326 # Ganeti 2.6 will start to modify the node object on disk conversion
13327 self.needed_locks[locking.LEVEL_NODE] = []
13328 self.needed_locks[locking.LEVEL_NODE_RES] = []
13329 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13330 # Look node group to look up the ipolicy
13331 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13333 def DeclareLocks(self, level):
13334 if level == locking.LEVEL_NODEGROUP:
13335 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13336 # Acquire locks for the instance's nodegroups optimistically. Needs
13337 # to be verified in CheckPrereq
13338 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13339 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13340 elif level == locking.LEVEL_NODE:
13341 self._LockInstancesNodes()
13342 if self.op.disk_template and self.op.remote_node:
13343 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13344 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13345 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13347 self.needed_locks[locking.LEVEL_NODE_RES] = \
13348 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13350 def BuildHooksEnv(self):
13351 """Build hooks env.
13353 This runs on the master, primary and secondaries.
13357 if constants.BE_MINMEM in self.be_new:
13358 args["minmem"] = self.be_new[constants.BE_MINMEM]
13359 if constants.BE_MAXMEM in self.be_new:
13360 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13361 if constants.BE_VCPUS in self.be_new:
13362 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13363 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13364 # information at all.
13366 if self._new_nics is not None:
13369 for nic in self._new_nics:
13370 n = copy.deepcopy(nic)
13371 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13372 n.nicparams = nicparams
13373 nics.append(_NICToTuple(self, n))
13375 args["nics"] = nics
13377 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13378 if self.op.disk_template:
13379 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13380 if self.op.runtime_mem:
13381 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13385 def BuildHooksNodes(self):
13386 """Build hooks nodes.
13389 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13392 def _PrepareNicModification(self, params, private, old_ip, old_net,
13393 old_params, cluster, pnode):
13395 update_params_dict = dict([(key, params[key])
13396 for key in constants.NICS_PARAMETERS
13399 req_link = update_params_dict.get(constants.NIC_LINK, None)
13400 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13402 new_net = params.get(constants.INIC_NETWORK, old_net)
13403 if new_net is not None:
13404 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13405 if netparams is None:
13406 raise errors.OpPrereqError("No netparams found for the network"
13407 " %s, probably not connected" % new_net,
13408 errors.ECODE_INVAL)
13409 new_params = dict(netparams)
13411 new_params = _GetUpdatedParams(old_params, update_params_dict)
13413 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13415 new_filled_params = cluster.SimpleFillNIC(new_params)
13416 objects.NIC.CheckParameterSyntax(new_filled_params)
13418 new_mode = new_filled_params[constants.NIC_MODE]
13419 if new_mode == constants.NIC_MODE_BRIDGED:
13420 bridge = new_filled_params[constants.NIC_LINK]
13421 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13423 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13425 self.warn.append(msg)
13427 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13429 elif new_mode == constants.NIC_MODE_ROUTED:
13430 ip = params.get(constants.INIC_IP, old_ip)
13432 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13433 " on a routed NIC", errors.ECODE_INVAL)
13435 elif new_mode == constants.NIC_MODE_OVS:
13436 # TODO: check OVS link
13437 self.LogInfo("OVS links are currently not checked for correctness")
13439 if constants.INIC_MAC in params:
13440 mac = params[constants.INIC_MAC]
13442 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13443 errors.ECODE_INVAL)
13444 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13445 # otherwise generate the MAC address
13446 params[constants.INIC_MAC] = \
13447 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13449 # or validate/reserve the current one
13451 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13452 except errors.ReservationError:
13453 raise errors.OpPrereqError("MAC address '%s' already in use"
13454 " in cluster" % mac,
13455 errors.ECODE_NOTUNIQUE)
13456 elif new_net != old_net:
13458 def get_net_prefix(net):
13461 uuid = self.cfg.LookupNetwork(net)
13462 mac_prefix = self.cfg.GetNetwork(uuid).mac_prefix
13466 new_prefix = get_net_prefix(new_net)
13467 old_prefix = get_net_prefix(old_net)
13468 if old_prefix != new_prefix:
13469 params[constants.INIC_MAC] = \
13470 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13472 #if there is a change in nic-network configuration
13473 new_ip = params.get(constants.INIC_IP, old_ip)
13474 if (new_ip, new_net) != (old_ip, old_net):
13477 if new_ip.lower() == constants.NIC_IP_POOL:
13479 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13480 except errors.ReservationError:
13481 raise errors.OpPrereqError("Unable to get a free IP"
13482 " from the address pool",
13483 errors.ECODE_STATE)
13484 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13485 params[constants.INIC_IP] = new_ip
13486 elif new_ip != old_ip or new_net != old_net:
13488 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13489 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13490 except errors.ReservationError:
13491 raise errors.OpPrereqError("IP %s not available in network %s" %
13493 errors.ECODE_NOTUNIQUE)
13494 elif new_ip.lower() == constants.NIC_IP_POOL:
13495 raise errors.OpPrereqError("ip=pool, but no network found",
13496 errors.ECODE_INVAL)
13499 elif self.op.conflicts_check:
13500 _CheckForConflictingIp(self, new_ip, pnode)
13505 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13506 except errors.AddressPoolError:
13507 logging.warning("Release IP %s not contained in network %s",
13510 # there are no changes in (net, ip) tuple
13511 elif (old_net is not None and
13512 (req_link is not None or req_mode is not None)):
13513 raise errors.OpPrereqError("Not allowed to change link or mode of"
13514 " a NIC that is connected to a network",
13515 errors.ECODE_INVAL)
13517 private.params = new_params
13518 private.filled = new_filled_params
13520 def _PreCheckDiskTemplate(self, pnode_info):
13521 """CheckPrereq checks related to a new disk template."""
13522 # Arguments are passed to avoid configuration lookups
13523 instance = self.instance
13524 pnode = instance.primary_node
13525 cluster = self.cluster
13526 if instance.disk_template == self.op.disk_template:
13527 raise errors.OpPrereqError("Instance already has disk template %s" %
13528 instance.disk_template, errors.ECODE_INVAL)
13530 if (instance.disk_template,
13531 self.op.disk_template) not in self._DISK_CONVERSIONS:
13532 raise errors.OpPrereqError("Unsupported disk template conversion from"
13533 " %s to %s" % (instance.disk_template,
13534 self.op.disk_template),
13535 errors.ECODE_INVAL)
13536 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13537 msg="cannot change disk template")
13538 if self.op.disk_template in constants.DTS_INT_MIRROR:
13539 if self.op.remote_node == pnode:
13540 raise errors.OpPrereqError("Given new secondary node %s is the same"
13541 " as the primary node of the instance" %
13542 self.op.remote_node, errors.ECODE_STATE)
13543 _CheckNodeOnline(self, self.op.remote_node)
13544 _CheckNodeNotDrained(self, self.op.remote_node)
13545 # FIXME: here we assume that the old instance type is DT_PLAIN
13546 assert instance.disk_template == constants.DT_PLAIN
13547 disks = [{constants.IDISK_SIZE: d.size,
13548 constants.IDISK_VG: d.logical_id[0]}
13549 for d in instance.disks]
13550 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13551 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13553 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13554 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13555 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13557 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13558 ignore=self.op.ignore_ipolicy)
13559 if pnode_info.group != snode_info.group:
13560 self.LogWarning("The primary and secondary nodes are in two"
13561 " different node groups; the disk parameters"
13562 " from the first disk's node group will be"
13565 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13566 # Make sure none of the nodes require exclusive storage
13567 nodes = [pnode_info]
13568 if self.op.disk_template in constants.DTS_INT_MIRROR:
13570 nodes.append(snode_info)
13571 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13572 if compat.any(map(has_es, nodes)):
13573 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13574 " storage is enabled" % (instance.disk_template,
13575 self.op.disk_template))
13576 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13578 def CheckPrereq(self):
13579 """Check prerequisites.
13581 This only checks the instance list against the existing names.
13584 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13585 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13587 cluster = self.cluster = self.cfg.GetClusterInfo()
13588 assert self.instance is not None, \
13589 "Cannot retrieve locked instance %s" % self.op.instance_name
13591 pnode = instance.primary_node
13592 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13593 nodelist = list(instance.all_nodes)
13594 pnode_info = self.cfg.GetNodeInfo(pnode)
13595 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13597 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13598 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13599 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13601 # dictionary with instance information after the modification
13604 # Check disk modifications. This is done here and not in CheckArguments
13605 # (as with NICs), because we need to know the instance's disk template
13606 if instance.disk_template == constants.DT_EXT:
13607 self._CheckMods("disk", self.op.disks, {},
13608 self._VerifyDiskModification)
13610 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13611 self._VerifyDiskModification)
13613 # Prepare disk/NIC modifications
13614 self.diskmod = PrepareContainerMods(self.op.disks, None)
13615 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13617 # Check the validity of the `provider' parameter
13618 if instance.disk_template in constants.DT_EXT:
13619 for mod in self.diskmod:
13620 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13621 if mod[0] == constants.DDM_ADD:
13622 if ext_provider is None:
13623 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13624 " '%s' missing, during disk add" %
13626 constants.IDISK_PROVIDER),
13627 errors.ECODE_NOENT)
13628 elif mod[0] == constants.DDM_MODIFY:
13630 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13632 constants.IDISK_PROVIDER,
13633 errors.ECODE_INVAL)
13635 for mod in self.diskmod:
13636 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13637 if ext_provider is not None:
13638 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13639 " instances of type '%s'" %
13640 (constants.IDISK_PROVIDER,
13642 errors.ECODE_INVAL)
13645 if self.op.os_name and not self.op.force:
13646 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13647 self.op.force_variant)
13648 instance_os = self.op.os_name
13650 instance_os = instance.os
13652 assert not (self.op.disk_template and self.op.disks), \
13653 "Can't modify disk template and apply disk changes at the same time"
13655 if self.op.disk_template:
13656 self._PreCheckDiskTemplate(pnode_info)
13658 # hvparams processing
13659 if self.op.hvparams:
13660 hv_type = instance.hypervisor
13661 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13662 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13663 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13666 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13667 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13668 self.hv_proposed = self.hv_new = hv_new # the new actual values
13669 self.hv_inst = i_hvdict # the new dict (without defaults)
13671 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13673 self.hv_new = self.hv_inst = {}
13675 # beparams processing
13676 if self.op.beparams:
13677 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13679 objects.UpgradeBeParams(i_bedict)
13680 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13681 be_new = cluster.SimpleFillBE(i_bedict)
13682 self.be_proposed = self.be_new = be_new # the new actual values
13683 self.be_inst = i_bedict # the new dict (without defaults)
13685 self.be_new = self.be_inst = {}
13686 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13687 be_old = cluster.FillBE(instance)
13689 # CPU param validation -- checking every time a parameter is
13690 # changed to cover all cases where either CPU mask or vcpus have
13692 if (constants.BE_VCPUS in self.be_proposed and
13693 constants.HV_CPU_MASK in self.hv_proposed):
13695 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13696 # Verify mask is consistent with number of vCPUs. Can skip this
13697 # test if only 1 entry in the CPU mask, which means same mask
13698 # is applied to all vCPUs.
13699 if (len(cpu_list) > 1 and
13700 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13701 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13703 (self.be_proposed[constants.BE_VCPUS],
13704 self.hv_proposed[constants.HV_CPU_MASK]),
13705 errors.ECODE_INVAL)
13707 # Only perform this test if a new CPU mask is given
13708 if constants.HV_CPU_MASK in self.hv_new:
13709 # Calculate the largest CPU number requested
13710 max_requested_cpu = max(map(max, cpu_list))
13711 # Check that all of the instance's nodes have enough physical CPUs to
13712 # satisfy the requested CPU mask
13713 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13714 max_requested_cpu + 1, instance.hypervisor)
13716 # osparams processing
13717 if self.op.osparams:
13718 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13719 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13720 self.os_inst = i_osdict # the new dict (without defaults)
13726 #TODO(dynmem): do the appropriate check involving MINMEM
13727 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13728 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13729 mem_check_list = [pnode]
13730 if be_new[constants.BE_AUTO_BALANCE]:
13731 # either we changed auto_balance to yes or it was from before
13732 mem_check_list.extend(instance.secondary_nodes)
13733 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13734 instance.hypervisor)
13735 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13736 [instance.hypervisor], False)
13737 pninfo = nodeinfo[pnode]
13738 msg = pninfo.fail_msg
13740 # Assume the primary node is unreachable and go ahead
13741 self.warn.append("Can't get info from primary node %s: %s" %
13744 (_, _, (pnhvinfo, )) = pninfo.payload
13745 if not isinstance(pnhvinfo.get("memory_free", None), int):
13746 self.warn.append("Node data from primary node %s doesn't contain"
13747 " free memory information" % pnode)
13748 elif instance_info.fail_msg:
13749 self.warn.append("Can't get instance runtime information: %s" %
13750 instance_info.fail_msg)
13752 if instance_info.payload:
13753 current_mem = int(instance_info.payload["memory"])
13755 # Assume instance not running
13756 # (there is a slight race condition here, but it's not very
13757 # probable, and we have no other way to check)
13758 # TODO: Describe race condition
13760 #TODO(dynmem): do the appropriate check involving MINMEM
13761 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13762 pnhvinfo["memory_free"])
13764 raise errors.OpPrereqError("This change will prevent the instance"
13765 " from starting, due to %d MB of memory"
13766 " missing on its primary node" %
13767 miss_mem, errors.ECODE_NORES)
13769 if be_new[constants.BE_AUTO_BALANCE]:
13770 for node, nres in nodeinfo.items():
13771 if node not in instance.secondary_nodes:
13773 nres.Raise("Can't get info from secondary node %s" % node,
13774 prereq=True, ecode=errors.ECODE_STATE)
13775 (_, _, (nhvinfo, )) = nres.payload
13776 if not isinstance(nhvinfo.get("memory_free", None), int):
13777 raise errors.OpPrereqError("Secondary node %s didn't return free"
13778 " memory information" % node,
13779 errors.ECODE_STATE)
13780 #TODO(dynmem): do the appropriate check involving MINMEM
13781 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13782 raise errors.OpPrereqError("This change will prevent the instance"
13783 " from failover to its secondary node"
13784 " %s, due to not enough memory" % node,
13785 errors.ECODE_STATE)
13787 if self.op.runtime_mem:
13788 remote_info = self.rpc.call_instance_info(instance.primary_node,
13790 instance.hypervisor)
13791 remote_info.Raise("Error checking node %s" % instance.primary_node)
13792 if not remote_info.payload: # not running already
13793 raise errors.OpPrereqError("Instance %s is not running" %
13794 instance.name, errors.ECODE_STATE)
13796 current_memory = remote_info.payload["memory"]
13797 if (not self.op.force and
13798 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13799 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13800 raise errors.OpPrereqError("Instance %s must have memory between %d"
13801 " and %d MB of memory unless --force is"
13804 self.be_proposed[constants.BE_MINMEM],
13805 self.be_proposed[constants.BE_MAXMEM]),
13806 errors.ECODE_INVAL)
13808 delta = self.op.runtime_mem - current_memory
13810 _CheckNodeFreeMemory(self, instance.primary_node,
13811 "ballooning memory for instance %s" %
13812 instance.name, delta, instance.hypervisor)
13814 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13815 raise errors.OpPrereqError("Disk operations not supported for"
13816 " diskless instances", errors.ECODE_INVAL)
13818 def _PrepareNicCreate(_, params, private):
13819 self._PrepareNicModification(params, private, None, None,
13820 {}, cluster, pnode)
13821 return (None, None)
13823 def _PrepareNicMod(_, nic, params, private):
13824 self._PrepareNicModification(params, private, nic.ip, nic.network,
13825 nic.nicparams, cluster, pnode)
13828 def _PrepareNicRemove(_, params, __):
13830 net = params.network
13831 if net is not None and ip is not None:
13832 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13834 # Verify NIC changes (operating on copy)
13835 nics = instance.nics[:]
13836 ApplyContainerMods("NIC", nics, None, self.nicmod,
13837 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13838 if len(nics) > constants.MAX_NICS:
13839 raise errors.OpPrereqError("Instance has too many network interfaces"
13840 " (%d), cannot add more" % constants.MAX_NICS,
13841 errors.ECODE_STATE)
13843 # Verify disk changes (operating on a copy)
13844 disks = instance.disks[:]
13845 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13846 if len(disks) > constants.MAX_DISKS:
13847 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13848 " more" % constants.MAX_DISKS,
13849 errors.ECODE_STATE)
13850 disk_sizes = [disk.size for disk in instance.disks]
13851 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13852 self.diskmod if op == constants.DDM_ADD)
13853 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13854 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13856 if self.op.offline is not None and self.op.offline:
13857 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13858 msg="can't change to offline")
13860 # Pre-compute NIC changes (necessary to use result in hooks)
13861 self._nic_chgdesc = []
13863 # Operate on copies as this is still in prereq
13864 nics = [nic.Copy() for nic in instance.nics]
13865 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13866 self._CreateNewNic, self._ApplyNicMods, None)
13867 self._new_nics = nics
13868 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13870 self._new_nics = None
13871 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13873 if not self.op.ignore_ipolicy:
13874 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13877 # Fill ispec with backend parameters
13878 ispec[constants.ISPEC_SPINDLE_USE] = \
13879 self.be_new.get(constants.BE_SPINDLE_USE, None)
13880 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13883 # Copy ispec to verify parameters with min/max values separately
13884 ispec_max = ispec.copy()
13885 ispec_max[constants.ISPEC_MEM_SIZE] = \
13886 self.be_new.get(constants.BE_MAXMEM, None)
13887 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13888 ispec_min = ispec.copy()
13889 ispec_min[constants.ISPEC_MEM_SIZE] = \
13890 self.be_new.get(constants.BE_MINMEM, None)
13891 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13893 if (res_max or res_min):
13894 # FIXME: Improve error message by including information about whether
13895 # the upper or lower limit of the parameter fails the ipolicy.
13896 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13897 (group_info, group_info.name,
13898 utils.CommaJoin(set(res_max + res_min))))
13899 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13901 def _ConvertPlainToDrbd(self, feedback_fn):
13902 """Converts an instance from plain to drbd.
13905 feedback_fn("Converting template to drbd")
13906 instance = self.instance
13907 pnode = instance.primary_node
13908 snode = self.op.remote_node
13910 assert instance.disk_template == constants.DT_PLAIN
13912 # create a fake disk info for _GenerateDiskTemplate
13913 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13914 constants.IDISK_VG: d.logical_id[0]}
13915 for d in instance.disks]
13916 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13917 instance.name, pnode, [snode],
13918 disk_info, None, None, 0, feedback_fn,
13920 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13922 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13923 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13924 info = _GetInstanceInfoText(instance)
13925 feedback_fn("Creating additional volumes...")
13926 # first, create the missing data and meta devices
13927 for disk in anno_disks:
13928 # unfortunately this is... not too nice
13929 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13930 info, True, p_excl_stor)
13931 for child in disk.children:
13932 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13934 # at this stage, all new LVs have been created, we can rename the
13936 feedback_fn("Renaming original volumes...")
13937 rename_list = [(o, n.children[0].logical_id)
13938 for (o, n) in zip(instance.disks, new_disks)]
13939 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13940 result.Raise("Failed to rename original LVs")
13942 feedback_fn("Initializing DRBD devices...")
13943 # all child devices are in place, we can now create the DRBD devices
13944 for disk in anno_disks:
13945 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13946 f_create = node == pnode
13947 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13950 # at this point, the instance has been modified
13951 instance.disk_template = constants.DT_DRBD8
13952 instance.disks = new_disks
13953 self.cfg.Update(instance, feedback_fn)
13955 # Release node locks while waiting for sync
13956 _ReleaseLocks(self, locking.LEVEL_NODE)
13958 # disks are created, waiting for sync
13959 disk_abort = not _WaitForSync(self, instance,
13960 oneshot=not self.op.wait_for_sync)
13962 raise errors.OpExecError("There are some degraded disks for"
13963 " this instance, please cleanup manually")
13965 # Node resource locks will be released by caller
13967 def _ConvertDrbdToPlain(self, feedback_fn):
13968 """Converts an instance from drbd to plain.
13971 instance = self.instance
13973 assert len(instance.secondary_nodes) == 1
13974 assert instance.disk_template == constants.DT_DRBD8
13976 pnode = instance.primary_node
13977 snode = instance.secondary_nodes[0]
13978 feedback_fn("Converting template to plain")
13980 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13981 new_disks = [d.children[0] for d in instance.disks]
13983 # copy over size and mode
13984 for parent, child in zip(old_disks, new_disks):
13985 child.size = parent.size
13986 child.mode = parent.mode
13988 # this is a DRBD disk, return its port to the pool
13989 # NOTE: this must be done right before the call to cfg.Update!
13990 for disk in old_disks:
13991 tcp_port = disk.logical_id[2]
13992 self.cfg.AddTcpUdpPort(tcp_port)
13994 # update instance structure
13995 instance.disks = new_disks
13996 instance.disk_template = constants.DT_PLAIN
13997 self.cfg.Update(instance, feedback_fn)
13999 # Release locks in case removing disks takes a while
14000 _ReleaseLocks(self, locking.LEVEL_NODE)
14002 feedback_fn("Removing volumes on the secondary node...")
14003 for disk in old_disks:
14004 self.cfg.SetDiskID(disk, snode)
14005 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14007 self.LogWarning("Could not remove block device %s on node %s,"
14008 " continuing anyway: %s", disk.iv_name, snode, msg)
14010 feedback_fn("Removing unneeded volumes on the primary node...")
14011 for idx, disk in enumerate(old_disks):
14012 meta = disk.children[1]
14013 self.cfg.SetDiskID(meta, pnode)
14014 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14016 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14017 " continuing anyway: %s", idx, pnode, msg)
14019 def _CreateNewDisk(self, idx, params, _):
14020 """Creates a new disk.
14023 instance = self.instance
14026 if instance.disk_template in constants.DTS_FILEBASED:
14027 (file_driver, file_path) = instance.disks[0].logical_id
14028 file_path = os.path.dirname(file_path)
14030 file_driver = file_path = None
14033 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14034 instance.primary_node, instance.secondary_nodes,
14035 [params], file_path, file_driver, idx,
14036 self.Log, self.diskparams)[0]
14038 info = _GetInstanceInfoText(instance)
14040 logging.info("Creating volume %s for instance %s",
14041 disk.iv_name, instance.name)
14042 # Note: this needs to be kept in sync with _CreateDisks
14044 for node in instance.all_nodes:
14045 f_create = (node == instance.primary_node)
14047 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14048 except errors.OpExecError, err:
14049 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14050 disk.iv_name, disk, node, err)
14053 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14057 def _ModifyDisk(idx, disk, params, _):
14058 """Modifies a disk.
14061 disk.mode = params[constants.IDISK_MODE]
14064 ("disk.mode/%d" % idx, disk.mode),
14067 def _RemoveDisk(self, idx, root, _):
14071 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14072 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14073 self.cfg.SetDiskID(disk, node)
14074 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14076 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14077 " continuing anyway", idx, node, msg)
14079 # if this is a DRBD disk, return its port to the pool
14080 if root.dev_type in constants.LDS_DRBD:
14081 self.cfg.AddTcpUdpPort(root.logical_id[2])
14084 def _CreateNewNic(idx, params, private):
14085 """Creates data structure for a new network interface.
14088 mac = params[constants.INIC_MAC]
14089 ip = params.get(constants.INIC_IP, None)
14090 net = params.get(constants.INIC_NETWORK, None)
14091 #TODO: not private.filled?? can a nic have no nicparams??
14092 nicparams = private.filled
14094 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14096 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14097 (mac, ip, private.filled[constants.NIC_MODE],
14098 private.filled[constants.NIC_LINK],
14103 def _ApplyNicMods(idx, nic, params, private):
14104 """Modifies a network interface.
14109 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14111 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14112 setattr(nic, key, params[key])
14115 nic.nicparams = private.filled
14117 for (key, val) in nic.nicparams.items():
14118 changes.append(("nic.%s/%d" % (key, idx), val))
14122 def Exec(self, feedback_fn):
14123 """Modifies an instance.
14125 All parameters take effect only at the next restart of the instance.
14128 # Process here the warnings from CheckPrereq, as we don't have a
14129 # feedback_fn there.
14130 # TODO: Replace with self.LogWarning
14131 for warn in self.warn:
14132 feedback_fn("WARNING: %s" % warn)
14134 assert ((self.op.disk_template is None) ^
14135 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14136 "Not owning any node resource locks"
14139 instance = self.instance
14142 if self.op.runtime_mem:
14143 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14145 self.op.runtime_mem)
14146 rpcres.Raise("Cannot modify instance runtime memory")
14147 result.append(("runtime_memory", self.op.runtime_mem))
14149 # Apply disk changes
14150 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14151 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14152 _UpdateIvNames(0, instance.disks)
14154 if self.op.disk_template:
14156 check_nodes = set(instance.all_nodes)
14157 if self.op.remote_node:
14158 check_nodes.add(self.op.remote_node)
14159 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14160 owned = self.owned_locks(level)
14161 assert not (check_nodes - owned), \
14162 ("Not owning the correct locks, owning %r, expected at least %r" %
14163 (owned, check_nodes))
14165 r_shut = _ShutdownInstanceDisks(self, instance)
14167 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14168 " proceed with disk template conversion")
14169 mode = (instance.disk_template, self.op.disk_template)
14171 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14173 self.cfg.ReleaseDRBDMinors(instance.name)
14175 result.append(("disk_template", self.op.disk_template))
14177 assert instance.disk_template == self.op.disk_template, \
14178 ("Expected disk template '%s', found '%s'" %
14179 (self.op.disk_template, instance.disk_template))
14181 # Release node and resource locks if there are any (they might already have
14182 # been released during disk conversion)
14183 _ReleaseLocks(self, locking.LEVEL_NODE)
14184 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14186 # Apply NIC changes
14187 if self._new_nics is not None:
14188 instance.nics = self._new_nics
14189 result.extend(self._nic_chgdesc)
14192 if self.op.hvparams:
14193 instance.hvparams = self.hv_inst
14194 for key, val in self.op.hvparams.iteritems():
14195 result.append(("hv/%s" % key, val))
14198 if self.op.beparams:
14199 instance.beparams = self.be_inst
14200 for key, val in self.op.beparams.iteritems():
14201 result.append(("be/%s" % key, val))
14204 if self.op.os_name:
14205 instance.os = self.op.os_name
14208 if self.op.osparams:
14209 instance.osparams = self.os_inst
14210 for key, val in self.op.osparams.iteritems():
14211 result.append(("os/%s" % key, val))
14213 if self.op.offline is None:
14216 elif self.op.offline:
14217 # Mark instance as offline
14218 self.cfg.MarkInstanceOffline(instance.name)
14219 result.append(("admin_state", constants.ADMINST_OFFLINE))
14221 # Mark instance as online, but stopped
14222 self.cfg.MarkInstanceDown(instance.name)
14223 result.append(("admin_state", constants.ADMINST_DOWN))
14225 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14227 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14228 self.owned_locks(locking.LEVEL_NODE)), \
14229 "All node locks should have been released by now"
14233 _DISK_CONVERSIONS = {
14234 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14235 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14239 class LUInstanceChangeGroup(LogicalUnit):
14240 HPATH = "instance-change-group"
14241 HTYPE = constants.HTYPE_INSTANCE
14244 def ExpandNames(self):
14245 self.share_locks = _ShareAll()
14247 self.needed_locks = {
14248 locking.LEVEL_NODEGROUP: [],
14249 locking.LEVEL_NODE: [],
14250 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14253 self._ExpandAndLockInstance()
14255 if self.op.target_groups:
14256 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14257 self.op.target_groups)
14259 self.req_target_uuids = None
14261 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14263 def DeclareLocks(self, level):
14264 if level == locking.LEVEL_NODEGROUP:
14265 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14267 if self.req_target_uuids:
14268 lock_groups = set(self.req_target_uuids)
14270 # Lock all groups used by instance optimistically; this requires going
14271 # via the node before it's locked, requiring verification later on
14272 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14273 lock_groups.update(instance_groups)
14275 # No target groups, need to lock all of them
14276 lock_groups = locking.ALL_SET
14278 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14280 elif level == locking.LEVEL_NODE:
14281 if self.req_target_uuids:
14282 # Lock all nodes used by instances
14283 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14284 self._LockInstancesNodes()
14286 # Lock all nodes in all potential target groups
14287 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14288 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14289 member_nodes = [node_name
14290 for group in lock_groups
14291 for node_name in self.cfg.GetNodeGroup(group).members]
14292 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14294 # Lock all nodes as all groups are potential targets
14295 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14297 def CheckPrereq(self):
14298 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14299 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14300 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14302 assert (self.req_target_uuids is None or
14303 owned_groups.issuperset(self.req_target_uuids))
14304 assert owned_instances == set([self.op.instance_name])
14306 # Get instance information
14307 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14309 # Check if node groups for locked instance are still correct
14310 assert owned_nodes.issuperset(self.instance.all_nodes), \
14311 ("Instance %s's nodes changed while we kept the lock" %
14312 self.op.instance_name)
14314 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14317 if self.req_target_uuids:
14318 # User requested specific target groups
14319 self.target_uuids = frozenset(self.req_target_uuids)
14321 # All groups except those used by the instance are potential targets
14322 self.target_uuids = owned_groups - inst_groups
14324 conflicting_groups = self.target_uuids & inst_groups
14325 if conflicting_groups:
14326 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14327 " used by the instance '%s'" %
14328 (utils.CommaJoin(conflicting_groups),
14329 self.op.instance_name),
14330 errors.ECODE_INVAL)
14332 if not self.target_uuids:
14333 raise errors.OpPrereqError("There are no possible target groups",
14334 errors.ECODE_INVAL)
14336 def BuildHooksEnv(self):
14337 """Build hooks env.
14340 assert self.target_uuids
14343 "TARGET_GROUPS": " ".join(self.target_uuids),
14346 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14350 def BuildHooksNodes(self):
14351 """Build hooks nodes.
14354 mn = self.cfg.GetMasterNode()
14355 return ([mn], [mn])
14357 def Exec(self, feedback_fn):
14358 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14360 assert instances == [self.op.instance_name], "Instance not locked"
14362 req = iallocator.IAReqGroupChange(instances=instances,
14363 target_groups=list(self.target_uuids))
14364 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14366 ial.Run(self.op.iallocator)
14368 if not ial.success:
14369 raise errors.OpPrereqError("Can't compute solution for changing group of"
14370 " instance '%s' using iallocator '%s': %s" %
14371 (self.op.instance_name, self.op.iallocator,
14372 ial.info), errors.ECODE_NORES)
14374 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14376 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14377 " instance '%s'", len(jobs), self.op.instance_name)
14379 return ResultWithJobs(jobs)
14382 class LUBackupQuery(NoHooksLU):
14383 """Query the exports list
14388 def CheckArguments(self):
14389 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14390 ["node", "export"], self.op.use_locking)
14392 def ExpandNames(self):
14393 self.expq.ExpandNames(self)
14395 def DeclareLocks(self, level):
14396 self.expq.DeclareLocks(self, level)
14398 def Exec(self, feedback_fn):
14401 for (node, expname) in self.expq.OldStyleQuery(self):
14402 if expname is None:
14403 result[node] = False
14405 result.setdefault(node, []).append(expname)
14410 class _ExportQuery(_QueryBase):
14411 FIELDS = query.EXPORT_FIELDS
14413 #: The node name is not a unique key for this query
14414 SORT_FIELD = "node"
14416 def ExpandNames(self, lu):
14417 lu.needed_locks = {}
14419 # The following variables interact with _QueryBase._GetNames
14421 self.wanted = _GetWantedNodes(lu, self.names)
14423 self.wanted = locking.ALL_SET
14425 self.do_locking = self.use_locking
14427 if self.do_locking:
14428 lu.share_locks = _ShareAll()
14429 lu.needed_locks = {
14430 locking.LEVEL_NODE: self.wanted,
14434 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14436 def DeclareLocks(self, lu, level):
14439 def _GetQueryData(self, lu):
14440 """Computes the list of nodes and their attributes.
14443 # Locking is not used
14445 assert not (compat.any(lu.glm.is_owned(level)
14446 for level in locking.LEVELS
14447 if level != locking.LEVEL_CLUSTER) or
14448 self.do_locking or self.use_locking)
14450 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14454 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14456 result.append((node, None))
14458 result.extend((node, expname) for expname in nres.payload)
14463 class LUBackupPrepare(NoHooksLU):
14464 """Prepares an instance for an export and returns useful information.
14469 def ExpandNames(self):
14470 self._ExpandAndLockInstance()
14472 def CheckPrereq(self):
14473 """Check prerequisites.
14476 instance_name = self.op.instance_name
14478 self.instance = self.cfg.GetInstanceInfo(instance_name)
14479 assert self.instance is not None, \
14480 "Cannot retrieve locked instance %s" % self.op.instance_name
14481 _CheckNodeOnline(self, self.instance.primary_node)
14483 self._cds = _GetClusterDomainSecret()
14485 def Exec(self, feedback_fn):
14486 """Prepares an instance for an export.
14489 instance = self.instance
14491 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14492 salt = utils.GenerateSecret(8)
14494 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14495 result = self.rpc.call_x509_cert_create(instance.primary_node,
14496 constants.RIE_CERT_VALIDITY)
14497 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14499 (name, cert_pem) = result.payload
14501 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14505 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14506 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14508 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14514 class LUBackupExport(LogicalUnit):
14515 """Export an instance to an image in the cluster.
14518 HPATH = "instance-export"
14519 HTYPE = constants.HTYPE_INSTANCE
14522 def CheckArguments(self):
14523 """Check the arguments.
14526 self.x509_key_name = self.op.x509_key_name
14527 self.dest_x509_ca_pem = self.op.destination_x509_ca
14529 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14530 if not self.x509_key_name:
14531 raise errors.OpPrereqError("Missing X509 key name for encryption",
14532 errors.ECODE_INVAL)
14534 if not self.dest_x509_ca_pem:
14535 raise errors.OpPrereqError("Missing destination X509 CA",
14536 errors.ECODE_INVAL)
14538 def ExpandNames(self):
14539 self._ExpandAndLockInstance()
14541 # Lock all nodes for local exports
14542 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14543 # FIXME: lock only instance primary and destination node
14545 # Sad but true, for now we have do lock all nodes, as we don't know where
14546 # the previous export might be, and in this LU we search for it and
14547 # remove it from its current node. In the future we could fix this by:
14548 # - making a tasklet to search (share-lock all), then create the
14549 # new one, then one to remove, after
14550 # - removing the removal operation altogether
14551 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14553 # Allocations should be stopped while this LU runs with node locks, but
14554 # it doesn't have to be exclusive
14555 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14556 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14558 def DeclareLocks(self, level):
14559 """Last minute lock declaration."""
14560 # All nodes are locked anyway, so nothing to do here.
14562 def BuildHooksEnv(self):
14563 """Build hooks env.
14565 This will run on the master, primary node and target node.
14569 "EXPORT_MODE": self.op.mode,
14570 "EXPORT_NODE": self.op.target_node,
14571 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14572 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14573 # TODO: Generic function for boolean env variables
14574 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14577 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14581 def BuildHooksNodes(self):
14582 """Build hooks nodes.
14585 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14587 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14588 nl.append(self.op.target_node)
14592 def CheckPrereq(self):
14593 """Check prerequisites.
14595 This checks that the instance and node names are valid.
14598 instance_name = self.op.instance_name
14600 self.instance = self.cfg.GetInstanceInfo(instance_name)
14601 assert self.instance is not None, \
14602 "Cannot retrieve locked instance %s" % self.op.instance_name
14603 _CheckNodeOnline(self, self.instance.primary_node)
14605 if (self.op.remove_instance and
14606 self.instance.admin_state == constants.ADMINST_UP and
14607 not self.op.shutdown):
14608 raise errors.OpPrereqError("Can not remove instance without shutting it"
14609 " down before", errors.ECODE_STATE)
14611 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14612 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14613 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14614 assert self.dst_node is not None
14616 _CheckNodeOnline(self, self.dst_node.name)
14617 _CheckNodeNotDrained(self, self.dst_node.name)
14620 self.dest_disk_info = None
14621 self.dest_x509_ca = None
14623 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14624 self.dst_node = None
14626 if len(self.op.target_node) != len(self.instance.disks):
14627 raise errors.OpPrereqError(("Received destination information for %s"
14628 " disks, but instance %s has %s disks") %
14629 (len(self.op.target_node), instance_name,
14630 len(self.instance.disks)),
14631 errors.ECODE_INVAL)
14633 cds = _GetClusterDomainSecret()
14635 # Check X509 key name
14637 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14638 except (TypeError, ValueError), err:
14639 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14640 errors.ECODE_INVAL)
14642 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14643 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14644 errors.ECODE_INVAL)
14646 # Load and verify CA
14648 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14649 except OpenSSL.crypto.Error, err:
14650 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14651 (err, ), errors.ECODE_INVAL)
14653 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14654 if errcode is not None:
14655 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14656 (msg, ), errors.ECODE_INVAL)
14658 self.dest_x509_ca = cert
14660 # Verify target information
14662 for idx, disk_data in enumerate(self.op.target_node):
14664 (host, port, magic) = \
14665 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14666 except errors.GenericError, err:
14667 raise errors.OpPrereqError("Target info for disk %s: %s" %
14668 (idx, err), errors.ECODE_INVAL)
14670 disk_info.append((host, port, magic))
14672 assert len(disk_info) == len(self.op.target_node)
14673 self.dest_disk_info = disk_info
14676 raise errors.ProgrammerError("Unhandled export mode %r" %
14679 # instance disk type verification
14680 # TODO: Implement export support for file-based disks
14681 for disk in self.instance.disks:
14682 if disk.dev_type == constants.LD_FILE:
14683 raise errors.OpPrereqError("Export not supported for instances with"
14684 " file-based disks", errors.ECODE_INVAL)
14686 def _CleanupExports(self, feedback_fn):
14687 """Removes exports of current instance from all other nodes.
14689 If an instance in a cluster with nodes A..D was exported to node C, its
14690 exports will be removed from the nodes A, B and D.
14693 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14695 nodelist = self.cfg.GetNodeList()
14696 nodelist.remove(self.dst_node.name)
14698 # on one-node clusters nodelist will be empty after the removal
14699 # if we proceed the backup would be removed because OpBackupQuery
14700 # substitutes an empty list with the full cluster node list.
14701 iname = self.instance.name
14703 feedback_fn("Removing old exports for instance %s" % iname)
14704 exportlist = self.rpc.call_export_list(nodelist)
14705 for node in exportlist:
14706 if exportlist[node].fail_msg:
14708 if iname in exportlist[node].payload:
14709 msg = self.rpc.call_export_remove(node, iname).fail_msg
14711 self.LogWarning("Could not remove older export for instance %s"
14712 " on node %s: %s", iname, node, msg)
14714 def Exec(self, feedback_fn):
14715 """Export an instance to an image in the cluster.
14718 assert self.op.mode in constants.EXPORT_MODES
14720 instance = self.instance
14721 src_node = instance.primary_node
14723 if self.op.shutdown:
14724 # shutdown the instance, but not the disks
14725 feedback_fn("Shutting down instance %s" % instance.name)
14726 result = self.rpc.call_instance_shutdown(src_node, instance,
14727 self.op.shutdown_timeout)
14728 # TODO: Maybe ignore failures if ignore_remove_failures is set
14729 result.Raise("Could not shutdown instance %s on"
14730 " node %s" % (instance.name, src_node))
14732 # set the disks ID correctly since call_instance_start needs the
14733 # correct drbd minor to create the symlinks
14734 for disk in instance.disks:
14735 self.cfg.SetDiskID(disk, src_node)
14737 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14740 # Activate the instance disks if we'exporting a stopped instance
14741 feedback_fn("Activating disks for %s" % instance.name)
14742 _StartInstanceDisks(self, instance, None)
14745 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14748 helper.CreateSnapshots()
14750 if (self.op.shutdown and
14751 instance.admin_state == constants.ADMINST_UP and
14752 not self.op.remove_instance):
14753 assert not activate_disks
14754 feedback_fn("Starting instance %s" % instance.name)
14755 result = self.rpc.call_instance_start(src_node,
14756 (instance, None, None), False)
14757 msg = result.fail_msg
14759 feedback_fn("Failed to start instance: %s" % msg)
14760 _ShutdownInstanceDisks(self, instance)
14761 raise errors.OpExecError("Could not start instance: %s" % msg)
14763 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14764 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14765 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14766 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14767 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14769 (key_name, _, _) = self.x509_key_name
14772 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14775 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14776 key_name, dest_ca_pem,
14781 # Check for backwards compatibility
14782 assert len(dresults) == len(instance.disks)
14783 assert compat.all(isinstance(i, bool) for i in dresults), \
14784 "Not all results are boolean: %r" % dresults
14788 feedback_fn("Deactivating disks for %s" % instance.name)
14789 _ShutdownInstanceDisks(self, instance)
14791 if not (compat.all(dresults) and fin_resu):
14794 failures.append("export finalization")
14795 if not compat.all(dresults):
14796 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14798 failures.append("disk export: disk(s) %s" % fdsk)
14800 raise errors.OpExecError("Export failed, errors in %s" %
14801 utils.CommaJoin(failures))
14803 # At this point, the export was successful, we can cleanup/finish
14805 # Remove instance if requested
14806 if self.op.remove_instance:
14807 feedback_fn("Removing instance %s" % instance.name)
14808 _RemoveInstance(self, feedback_fn, instance,
14809 self.op.ignore_remove_failures)
14811 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14812 self._CleanupExports(feedback_fn)
14814 return fin_resu, dresults
14817 class LUBackupRemove(NoHooksLU):
14818 """Remove exports related to the named instance.
14823 def ExpandNames(self):
14824 self.needed_locks = {
14825 # We need all nodes to be locked in order for RemoveExport to work, but
14826 # we don't need to lock the instance itself, as nothing will happen to it
14827 # (and we can remove exports also for a removed instance)
14828 locking.LEVEL_NODE: locking.ALL_SET,
14830 # Removing backups is quick, so blocking allocations is justified
14831 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14834 # Allocations should be stopped while this LU runs with node locks, but it
14835 # doesn't have to be exclusive
14836 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14838 def Exec(self, feedback_fn):
14839 """Remove any export.
14842 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14843 # If the instance was not found we'll try with the name that was passed in.
14844 # This will only work if it was an FQDN, though.
14846 if not instance_name:
14848 instance_name = self.op.instance_name
14850 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14851 exportlist = self.rpc.call_export_list(locked_nodes)
14853 for node in exportlist:
14854 msg = exportlist[node].fail_msg
14856 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14858 if instance_name in exportlist[node].payload:
14860 result = self.rpc.call_export_remove(node, instance_name)
14861 msg = result.fail_msg
14863 logging.error("Could not remove export for instance %s"
14864 " on node %s: %s", instance_name, node, msg)
14866 if fqdn_warn and not found:
14867 feedback_fn("Export not found. If trying to remove an export belonging"
14868 " to a deleted instance please use its Fully Qualified"
14872 class LUGroupAdd(LogicalUnit):
14873 """Logical unit for creating node groups.
14876 HPATH = "group-add"
14877 HTYPE = constants.HTYPE_GROUP
14880 def ExpandNames(self):
14881 # We need the new group's UUID here so that we can create and acquire the
14882 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14883 # that it should not check whether the UUID exists in the configuration.
14884 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14885 self.needed_locks = {}
14886 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14888 def CheckPrereq(self):
14889 """Check prerequisites.
14891 This checks that the given group name is not an existing node group
14896 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14897 except errors.OpPrereqError:
14900 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14901 " node group (UUID: %s)" %
14902 (self.op.group_name, existing_uuid),
14903 errors.ECODE_EXISTS)
14905 if self.op.ndparams:
14906 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14908 if self.op.hv_state:
14909 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14911 self.new_hv_state = None
14913 if self.op.disk_state:
14914 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14916 self.new_disk_state = None
14918 if self.op.diskparams:
14919 for templ in constants.DISK_TEMPLATES:
14920 if templ in self.op.diskparams:
14921 utils.ForceDictType(self.op.diskparams[templ],
14922 constants.DISK_DT_TYPES)
14923 self.new_diskparams = self.op.diskparams
14925 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14926 except errors.OpPrereqError, err:
14927 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14928 errors.ECODE_INVAL)
14930 self.new_diskparams = {}
14932 if self.op.ipolicy:
14933 cluster = self.cfg.GetClusterInfo()
14934 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14936 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14937 except errors.ConfigurationError, err:
14938 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14939 errors.ECODE_INVAL)
14941 def BuildHooksEnv(self):
14942 """Build hooks env.
14946 "GROUP_NAME": self.op.group_name,
14949 def BuildHooksNodes(self):
14950 """Build hooks nodes.
14953 mn = self.cfg.GetMasterNode()
14954 return ([mn], [mn])
14956 def Exec(self, feedback_fn):
14957 """Add the node group to the cluster.
14960 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14961 uuid=self.group_uuid,
14962 alloc_policy=self.op.alloc_policy,
14963 ndparams=self.op.ndparams,
14964 diskparams=self.new_diskparams,
14965 ipolicy=self.op.ipolicy,
14966 hv_state_static=self.new_hv_state,
14967 disk_state_static=self.new_disk_state)
14969 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14970 del self.remove_locks[locking.LEVEL_NODEGROUP]
14973 class LUGroupAssignNodes(NoHooksLU):
14974 """Logical unit for assigning nodes to groups.
14979 def ExpandNames(self):
14980 # These raise errors.OpPrereqError on their own:
14981 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14982 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14984 # We want to lock all the affected nodes and groups. We have readily
14985 # available the list of nodes, and the *destination* group. To gather the
14986 # list of "source" groups, we need to fetch node information later on.
14987 self.needed_locks = {
14988 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14989 locking.LEVEL_NODE: self.op.nodes,
14992 def DeclareLocks(self, level):
14993 if level == locking.LEVEL_NODEGROUP:
14994 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14996 # Try to get all affected nodes' groups without having the group or node
14997 # lock yet. Needs verification later in the code flow.
14998 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15000 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15002 def CheckPrereq(self):
15003 """Check prerequisites.
15006 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15007 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15008 frozenset(self.op.nodes))
15010 expected_locks = (set([self.group_uuid]) |
15011 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15012 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15013 if actual_locks != expected_locks:
15014 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15015 " current groups are '%s', used to be '%s'" %
15016 (utils.CommaJoin(expected_locks),
15017 utils.CommaJoin(actual_locks)))
15019 self.node_data = self.cfg.GetAllNodesInfo()
15020 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15021 instance_data = self.cfg.GetAllInstancesInfo()
15023 if self.group is None:
15024 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15025 (self.op.group_name, self.group_uuid))
15027 (new_splits, previous_splits) = \
15028 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15029 for node in self.op.nodes],
15030 self.node_data, instance_data)
15033 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15035 if not self.op.force:
15036 raise errors.OpExecError("The following instances get split by this"
15037 " change and --force was not given: %s" %
15040 self.LogWarning("This operation will split the following instances: %s",
15043 if previous_splits:
15044 self.LogWarning("In addition, these already-split instances continue"
15045 " to be split across groups: %s",
15046 utils.CommaJoin(utils.NiceSort(previous_splits)))
15048 def Exec(self, feedback_fn):
15049 """Assign nodes to a new group.
15052 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15054 self.cfg.AssignGroupNodes(mods)
15057 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15058 """Check for split instances after a node assignment.
15060 This method considers a series of node assignments as an atomic operation,
15061 and returns information about split instances after applying the set of
15064 In particular, it returns information about newly split instances, and
15065 instances that were already split, and remain so after the change.
15067 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15070 @type changes: list of (node_name, new_group_uuid) pairs.
15071 @param changes: list of node assignments to consider.
15072 @param node_data: a dict with data for all nodes
15073 @param instance_data: a dict with all instances to consider
15074 @rtype: a two-tuple
15075 @return: a list of instances that were previously okay and result split as a
15076 consequence of this change, and a list of instances that were previously
15077 split and this change does not fix.
15080 changed_nodes = dict((node, group) for node, group in changes
15081 if node_data[node].group != group)
15083 all_split_instances = set()
15084 previously_split_instances = set()
15086 def InstanceNodes(instance):
15087 return [instance.primary_node] + list(instance.secondary_nodes)
15089 for inst in instance_data.values():
15090 if inst.disk_template not in constants.DTS_INT_MIRROR:
15093 instance_nodes = InstanceNodes(inst)
15095 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15096 previously_split_instances.add(inst.name)
15098 if len(set(changed_nodes.get(node, node_data[node].group)
15099 for node in instance_nodes)) > 1:
15100 all_split_instances.add(inst.name)
15102 return (list(all_split_instances - previously_split_instances),
15103 list(previously_split_instances & all_split_instances))
15106 class _GroupQuery(_QueryBase):
15107 FIELDS = query.GROUP_FIELDS
15109 def ExpandNames(self, lu):
15110 lu.needed_locks = {}
15112 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15113 self._cluster = lu.cfg.GetClusterInfo()
15114 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15117 self.wanted = [name_to_uuid[name]
15118 for name in utils.NiceSort(name_to_uuid.keys())]
15120 # Accept names to be either names or UUIDs.
15123 all_uuid = frozenset(self._all_groups.keys())
15125 for name in self.names:
15126 if name in all_uuid:
15127 self.wanted.append(name)
15128 elif name in name_to_uuid:
15129 self.wanted.append(name_to_uuid[name])
15131 missing.append(name)
15134 raise errors.OpPrereqError("Some groups do not exist: %s" %
15135 utils.CommaJoin(missing),
15136 errors.ECODE_NOENT)
15138 def DeclareLocks(self, lu, level):
15141 def _GetQueryData(self, lu):
15142 """Computes the list of node groups and their attributes.
15145 do_nodes = query.GQ_NODE in self.requested_data
15146 do_instances = query.GQ_INST in self.requested_data
15148 group_to_nodes = None
15149 group_to_instances = None
15151 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15152 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15153 # latter GetAllInstancesInfo() is not enough, for we have to go through
15154 # instance->node. Hence, we will need to process nodes even if we only need
15155 # instance information.
15156 if do_nodes or do_instances:
15157 all_nodes = lu.cfg.GetAllNodesInfo()
15158 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15161 for node in all_nodes.values():
15162 if node.group in group_to_nodes:
15163 group_to_nodes[node.group].append(node.name)
15164 node_to_group[node.name] = node.group
15167 all_instances = lu.cfg.GetAllInstancesInfo()
15168 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15170 for instance in all_instances.values():
15171 node = instance.primary_node
15172 if node in node_to_group:
15173 group_to_instances[node_to_group[node]].append(instance.name)
15176 # Do not pass on node information if it was not requested.
15177 group_to_nodes = None
15179 return query.GroupQueryData(self._cluster,
15180 [self._all_groups[uuid]
15181 for uuid in self.wanted],
15182 group_to_nodes, group_to_instances,
15183 query.GQ_DISKPARAMS in self.requested_data)
15186 class LUGroupQuery(NoHooksLU):
15187 """Logical unit for querying node groups.
15192 def CheckArguments(self):
15193 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15194 self.op.output_fields, False)
15196 def ExpandNames(self):
15197 self.gq.ExpandNames(self)
15199 def DeclareLocks(self, level):
15200 self.gq.DeclareLocks(self, level)
15202 def Exec(self, feedback_fn):
15203 return self.gq.OldStyleQuery(self)
15206 class LUGroupSetParams(LogicalUnit):
15207 """Modifies the parameters of a node group.
15210 HPATH = "group-modify"
15211 HTYPE = constants.HTYPE_GROUP
15214 def CheckArguments(self):
15217 self.op.diskparams,
15218 self.op.alloc_policy,
15220 self.op.disk_state,
15224 if all_changes.count(None) == len(all_changes):
15225 raise errors.OpPrereqError("Please pass at least one modification",
15226 errors.ECODE_INVAL)
15228 def ExpandNames(self):
15229 # This raises errors.OpPrereqError on its own:
15230 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15232 self.needed_locks = {
15233 locking.LEVEL_INSTANCE: [],
15234 locking.LEVEL_NODEGROUP: [self.group_uuid],
15237 self.share_locks[locking.LEVEL_INSTANCE] = 1
15239 def DeclareLocks(self, level):
15240 if level == locking.LEVEL_INSTANCE:
15241 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15243 # Lock instances optimistically, needs verification once group lock has
15245 self.needed_locks[locking.LEVEL_INSTANCE] = \
15246 self.cfg.GetNodeGroupInstances(self.group_uuid)
15249 def _UpdateAndVerifyDiskParams(old, new):
15250 """Updates and verifies disk parameters.
15253 new_params = _GetUpdatedParams(old, new)
15254 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15257 def CheckPrereq(self):
15258 """Check prerequisites.
15261 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15263 # Check if locked instances are still correct
15264 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15266 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15267 cluster = self.cfg.GetClusterInfo()
15269 if self.group is None:
15270 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15271 (self.op.group_name, self.group_uuid))
15273 if self.op.ndparams:
15274 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15275 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15276 self.new_ndparams = new_ndparams
15278 if self.op.diskparams:
15279 diskparams = self.group.diskparams
15280 uavdp = self._UpdateAndVerifyDiskParams
15281 # For each disktemplate subdict update and verify the values
15282 new_diskparams = dict((dt,
15283 uavdp(diskparams.get(dt, {}),
15284 self.op.diskparams[dt]))
15285 for dt in constants.DISK_TEMPLATES
15286 if dt in self.op.diskparams)
15287 # As we've all subdicts of diskparams ready, lets merge the actual
15288 # dict with all updated subdicts
15289 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15291 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15292 except errors.OpPrereqError, err:
15293 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15294 errors.ECODE_INVAL)
15296 if self.op.hv_state:
15297 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15298 self.group.hv_state_static)
15300 if self.op.disk_state:
15301 self.new_disk_state = \
15302 _MergeAndVerifyDiskState(self.op.disk_state,
15303 self.group.disk_state_static)
15305 if self.op.ipolicy:
15306 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15310 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15311 inst_filter = lambda inst: inst.name in owned_instances
15312 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15313 gmi = ganeti.masterd.instance
15315 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15317 new_ipolicy, instances)
15320 self.LogWarning("After the ipolicy change the following instances"
15321 " violate them: %s",
15322 utils.CommaJoin(violations))
15324 def BuildHooksEnv(self):
15325 """Build hooks env.
15329 "GROUP_NAME": self.op.group_name,
15330 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15333 def BuildHooksNodes(self):
15334 """Build hooks nodes.
15337 mn = self.cfg.GetMasterNode()
15338 return ([mn], [mn])
15340 def Exec(self, feedback_fn):
15341 """Modifies the node group.
15346 if self.op.ndparams:
15347 self.group.ndparams = self.new_ndparams
15348 result.append(("ndparams", str(self.group.ndparams)))
15350 if self.op.diskparams:
15351 self.group.diskparams = self.new_diskparams
15352 result.append(("diskparams", str(self.group.diskparams)))
15354 if self.op.alloc_policy:
15355 self.group.alloc_policy = self.op.alloc_policy
15357 if self.op.hv_state:
15358 self.group.hv_state_static = self.new_hv_state
15360 if self.op.disk_state:
15361 self.group.disk_state_static = self.new_disk_state
15363 if self.op.ipolicy:
15364 self.group.ipolicy = self.new_ipolicy
15366 self.cfg.Update(self.group, feedback_fn)
15370 class LUGroupRemove(LogicalUnit):
15371 HPATH = "group-remove"
15372 HTYPE = constants.HTYPE_GROUP
15375 def ExpandNames(self):
15376 # This will raises errors.OpPrereqError on its own:
15377 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15378 self.needed_locks = {
15379 locking.LEVEL_NODEGROUP: [self.group_uuid],
15382 def CheckPrereq(self):
15383 """Check prerequisites.
15385 This checks that the given group name exists as a node group, that is
15386 empty (i.e., contains no nodes), and that is not the last group of the
15390 # Verify that the group is empty.
15391 group_nodes = [node.name
15392 for node in self.cfg.GetAllNodesInfo().values()
15393 if node.group == self.group_uuid]
15396 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15398 (self.op.group_name,
15399 utils.CommaJoin(utils.NiceSort(group_nodes))),
15400 errors.ECODE_STATE)
15402 # Verify the cluster would not be left group-less.
15403 if len(self.cfg.GetNodeGroupList()) == 1:
15404 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15405 " removed" % self.op.group_name,
15406 errors.ECODE_STATE)
15408 def BuildHooksEnv(self):
15409 """Build hooks env.
15413 "GROUP_NAME": self.op.group_name,
15416 def BuildHooksNodes(self):
15417 """Build hooks nodes.
15420 mn = self.cfg.GetMasterNode()
15421 return ([mn], [mn])
15423 def Exec(self, feedback_fn):
15424 """Remove the node group.
15428 self.cfg.RemoveNodeGroup(self.group_uuid)
15429 except errors.ConfigurationError:
15430 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15431 (self.op.group_name, self.group_uuid))
15433 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15436 class LUGroupRename(LogicalUnit):
15437 HPATH = "group-rename"
15438 HTYPE = constants.HTYPE_GROUP
15441 def ExpandNames(self):
15442 # This raises errors.OpPrereqError on its own:
15443 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15445 self.needed_locks = {
15446 locking.LEVEL_NODEGROUP: [self.group_uuid],
15449 def CheckPrereq(self):
15450 """Check prerequisites.
15452 Ensures requested new name is not yet used.
15456 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15457 except errors.OpPrereqError:
15460 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15461 " node group (UUID: %s)" %
15462 (self.op.new_name, new_name_uuid),
15463 errors.ECODE_EXISTS)
15465 def BuildHooksEnv(self):
15466 """Build hooks env.
15470 "OLD_NAME": self.op.group_name,
15471 "NEW_NAME": self.op.new_name,
15474 def BuildHooksNodes(self):
15475 """Build hooks nodes.
15478 mn = self.cfg.GetMasterNode()
15480 all_nodes = self.cfg.GetAllNodesInfo()
15481 all_nodes.pop(mn, None)
15484 run_nodes.extend(node.name for node in all_nodes.values()
15485 if node.group == self.group_uuid)
15487 return (run_nodes, run_nodes)
15489 def Exec(self, feedback_fn):
15490 """Rename the node group.
15493 group = self.cfg.GetNodeGroup(self.group_uuid)
15496 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15497 (self.op.group_name, self.group_uuid))
15499 group.name = self.op.new_name
15500 self.cfg.Update(group, feedback_fn)
15502 return self.op.new_name
15505 class LUGroupEvacuate(LogicalUnit):
15506 HPATH = "group-evacuate"
15507 HTYPE = constants.HTYPE_GROUP
15510 def ExpandNames(self):
15511 # This raises errors.OpPrereqError on its own:
15512 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15514 if self.op.target_groups:
15515 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15516 self.op.target_groups)
15518 self.req_target_uuids = []
15520 if self.group_uuid in self.req_target_uuids:
15521 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15522 " as a target group (targets are %s)" %
15524 utils.CommaJoin(self.req_target_uuids)),
15525 errors.ECODE_INVAL)
15527 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15529 self.share_locks = _ShareAll()
15530 self.needed_locks = {
15531 locking.LEVEL_INSTANCE: [],
15532 locking.LEVEL_NODEGROUP: [],
15533 locking.LEVEL_NODE: [],
15536 def DeclareLocks(self, level):
15537 if level == locking.LEVEL_INSTANCE:
15538 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15540 # Lock instances optimistically, needs verification once node and group
15541 # locks have been acquired
15542 self.needed_locks[locking.LEVEL_INSTANCE] = \
15543 self.cfg.GetNodeGroupInstances(self.group_uuid)
15545 elif level == locking.LEVEL_NODEGROUP:
15546 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15548 if self.req_target_uuids:
15549 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15551 # Lock all groups used by instances optimistically; this requires going
15552 # via the node before it's locked, requiring verification later on
15553 lock_groups.update(group_uuid
15554 for instance_name in
15555 self.owned_locks(locking.LEVEL_INSTANCE)
15557 self.cfg.GetInstanceNodeGroups(instance_name))
15559 # No target groups, need to lock all of them
15560 lock_groups = locking.ALL_SET
15562 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15564 elif level == locking.LEVEL_NODE:
15565 # This will only lock the nodes in the group to be evacuated which
15566 # contain actual instances
15567 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15568 self._LockInstancesNodes()
15570 # Lock all nodes in group to be evacuated and target groups
15571 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15572 assert self.group_uuid in owned_groups
15573 member_nodes = [node_name
15574 for group in owned_groups
15575 for node_name in self.cfg.GetNodeGroup(group).members]
15576 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15578 def CheckPrereq(self):
15579 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15580 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15581 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15583 assert owned_groups.issuperset(self.req_target_uuids)
15584 assert self.group_uuid in owned_groups
15586 # Check if locked instances are still correct
15587 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15589 # Get instance information
15590 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15592 # Check if node groups for locked instances are still correct
15593 _CheckInstancesNodeGroups(self.cfg, self.instances,
15594 owned_groups, owned_nodes, self.group_uuid)
15596 if self.req_target_uuids:
15597 # User requested specific target groups
15598 self.target_uuids = self.req_target_uuids
15600 # All groups except the one to be evacuated are potential targets
15601 self.target_uuids = [group_uuid for group_uuid in owned_groups
15602 if group_uuid != self.group_uuid]
15604 if not self.target_uuids:
15605 raise errors.OpPrereqError("There are no possible target groups",
15606 errors.ECODE_INVAL)
15608 def BuildHooksEnv(self):
15609 """Build hooks env.
15613 "GROUP_NAME": self.op.group_name,
15614 "TARGET_GROUPS": " ".join(self.target_uuids),
15617 def BuildHooksNodes(self):
15618 """Build hooks nodes.
15621 mn = self.cfg.GetMasterNode()
15623 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15625 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15627 return (run_nodes, run_nodes)
15629 def Exec(self, feedback_fn):
15630 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15632 assert self.group_uuid not in self.target_uuids
15634 req = iallocator.IAReqGroupChange(instances=instances,
15635 target_groups=self.target_uuids)
15636 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15638 ial.Run(self.op.iallocator)
15640 if not ial.success:
15641 raise errors.OpPrereqError("Can't compute group evacuation using"
15642 " iallocator '%s': %s" %
15643 (self.op.iallocator, ial.info),
15644 errors.ECODE_NORES)
15646 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15648 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15649 len(jobs), self.op.group_name)
15651 return ResultWithJobs(jobs)
15654 class TagsLU(NoHooksLU): # pylint: disable=W0223
15655 """Generic tags LU.
15657 This is an abstract class which is the parent of all the other tags LUs.
15660 def ExpandNames(self):
15661 self.group_uuid = None
15662 self.needed_locks = {}
15664 if self.op.kind == constants.TAG_NODE:
15665 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15666 lock_level = locking.LEVEL_NODE
15667 lock_name = self.op.name
15668 elif self.op.kind == constants.TAG_INSTANCE:
15669 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15670 lock_level = locking.LEVEL_INSTANCE
15671 lock_name = self.op.name
15672 elif self.op.kind == constants.TAG_NODEGROUP:
15673 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15674 lock_level = locking.LEVEL_NODEGROUP
15675 lock_name = self.group_uuid
15676 elif self.op.kind == constants.TAG_NETWORK:
15677 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15678 lock_level = locking.LEVEL_NETWORK
15679 lock_name = self.network_uuid
15684 if lock_level and getattr(self.op, "use_locking", True):
15685 self.needed_locks[lock_level] = lock_name
15687 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15688 # not possible to acquire the BGL based on opcode parameters)
15690 def CheckPrereq(self):
15691 """Check prerequisites.
15694 if self.op.kind == constants.TAG_CLUSTER:
15695 self.target = self.cfg.GetClusterInfo()
15696 elif self.op.kind == constants.TAG_NODE:
15697 self.target = self.cfg.GetNodeInfo(self.op.name)
15698 elif self.op.kind == constants.TAG_INSTANCE:
15699 self.target = self.cfg.GetInstanceInfo(self.op.name)
15700 elif self.op.kind == constants.TAG_NODEGROUP:
15701 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15702 elif self.op.kind == constants.TAG_NETWORK:
15703 self.target = self.cfg.GetNetwork(self.network_uuid)
15705 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15706 str(self.op.kind), errors.ECODE_INVAL)
15709 class LUTagsGet(TagsLU):
15710 """Returns the tags of a given object.
15715 def ExpandNames(self):
15716 TagsLU.ExpandNames(self)
15718 # Share locks as this is only a read operation
15719 self.share_locks = _ShareAll()
15721 def Exec(self, feedback_fn):
15722 """Returns the tag list.
15725 return list(self.target.GetTags())
15728 class LUTagsSearch(NoHooksLU):
15729 """Searches the tags for a given pattern.
15734 def ExpandNames(self):
15735 self.needed_locks = {}
15737 def CheckPrereq(self):
15738 """Check prerequisites.
15740 This checks the pattern passed for validity by compiling it.
15744 self.re = re.compile(self.op.pattern)
15745 except re.error, err:
15746 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15747 (self.op.pattern, err), errors.ECODE_INVAL)
15749 def Exec(self, feedback_fn):
15750 """Returns the tag list.
15754 tgts = [("/cluster", cfg.GetClusterInfo())]
15755 ilist = cfg.GetAllInstancesInfo().values()
15756 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15757 nlist = cfg.GetAllNodesInfo().values()
15758 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15759 tgts.extend(("/nodegroup/%s" % n.name, n)
15760 for n in cfg.GetAllNodeGroupsInfo().values())
15762 for path, target in tgts:
15763 for tag in target.GetTags():
15764 if self.re.search(tag):
15765 results.append((path, tag))
15769 class LUTagsSet(TagsLU):
15770 """Sets a tag on a given object.
15775 def CheckPrereq(self):
15776 """Check prerequisites.
15778 This checks the type and length of the tag name and value.
15781 TagsLU.CheckPrereq(self)
15782 for tag in self.op.tags:
15783 objects.TaggableObject.ValidateTag(tag)
15785 def Exec(self, feedback_fn):
15790 for tag in self.op.tags:
15791 self.target.AddTag(tag)
15792 except errors.TagError, err:
15793 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15794 self.cfg.Update(self.target, feedback_fn)
15797 class LUTagsDel(TagsLU):
15798 """Delete a list of tags from a given object.
15803 def CheckPrereq(self):
15804 """Check prerequisites.
15806 This checks that we have the given tag.
15809 TagsLU.CheckPrereq(self)
15810 for tag in self.op.tags:
15811 objects.TaggableObject.ValidateTag(tag)
15812 del_tags = frozenset(self.op.tags)
15813 cur_tags = self.target.GetTags()
15815 diff_tags = del_tags - cur_tags
15817 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15818 raise errors.OpPrereqError("Tag(s) %s not found" %
15819 (utils.CommaJoin(diff_names), ),
15820 errors.ECODE_NOENT)
15822 def Exec(self, feedback_fn):
15823 """Remove the tag from the object.
15826 for tag in self.op.tags:
15827 self.target.RemoveTag(tag)
15828 self.cfg.Update(self.target, feedback_fn)
15831 class LUTestDelay(NoHooksLU):
15832 """Sleep for a specified amount of time.
15834 This LU sleeps on the master and/or nodes for a specified amount of
15840 def ExpandNames(self):
15841 """Expand names and set required locks.
15843 This expands the node list, if any.
15846 self.needed_locks = {}
15847 if self.op.on_nodes:
15848 # _GetWantedNodes can be used here, but is not always appropriate to use
15849 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15850 # more information.
15851 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15852 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15854 def _TestDelay(self):
15855 """Do the actual sleep.
15858 if self.op.on_master:
15859 if not utils.TestDelay(self.op.duration):
15860 raise errors.OpExecError("Error during master delay test")
15861 if self.op.on_nodes:
15862 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15863 for node, node_result in result.items():
15864 node_result.Raise("Failure during rpc call to node %s" % node)
15866 def Exec(self, feedback_fn):
15867 """Execute the test delay opcode, with the wanted repetitions.
15870 if self.op.repeat == 0:
15873 top_value = self.op.repeat - 1
15874 for i in range(self.op.repeat):
15875 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15879 class LURestrictedCommand(NoHooksLU):
15880 """Logical unit for executing restricted commands.
15885 def ExpandNames(self):
15887 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15889 self.needed_locks = {
15890 locking.LEVEL_NODE: self.op.nodes,
15892 self.share_locks = {
15893 locking.LEVEL_NODE: not self.op.use_locking,
15896 def CheckPrereq(self):
15897 """Check prerequisites.
15901 def Exec(self, feedback_fn):
15902 """Execute restricted command and return output.
15905 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15907 # Check if correct locks are held
15908 assert set(self.op.nodes).issubset(owned_nodes)
15910 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15914 for node_name in self.op.nodes:
15915 nres = rpcres[node_name]
15917 msg = ("Command '%s' on node '%s' failed: %s" %
15918 (self.op.command, node_name, nres.fail_msg))
15919 result.append((False, msg))
15921 result.append((True, nres.payload))
15926 class LUTestJqueue(NoHooksLU):
15927 """Utility LU to test some aspects of the job queue.
15932 # Must be lower than default timeout for WaitForJobChange to see whether it
15933 # notices changed jobs
15934 _CLIENT_CONNECT_TIMEOUT = 20.0
15935 _CLIENT_CONFIRM_TIMEOUT = 60.0
15938 def _NotifyUsingSocket(cls, cb, errcls):
15939 """Opens a Unix socket and waits for another program to connect.
15942 @param cb: Callback to send socket name to client
15943 @type errcls: class
15944 @param errcls: Exception class to use for errors
15947 # Using a temporary directory as there's no easy way to create temporary
15948 # sockets without writing a custom loop around tempfile.mktemp and
15950 tmpdir = tempfile.mkdtemp()
15952 tmpsock = utils.PathJoin(tmpdir, "sock")
15954 logging.debug("Creating temporary socket at %s", tmpsock)
15955 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15960 # Send details to client
15963 # Wait for client to connect before continuing
15964 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15966 (conn, _) = sock.accept()
15967 except socket.error, err:
15968 raise errcls("Client didn't connect in time (%s)" % err)
15972 # Remove as soon as client is connected
15973 shutil.rmtree(tmpdir)
15975 # Wait for client to close
15978 # pylint: disable=E1101
15979 # Instance of '_socketobject' has no ... member
15980 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15982 except socket.error, err:
15983 raise errcls("Client failed to confirm notification (%s)" % err)
15987 def _SendNotification(self, test, arg, sockname):
15988 """Sends a notification to the client.
15991 @param test: Test name
15992 @param arg: Test argument (depends on test)
15993 @type sockname: string
15994 @param sockname: Socket path
15997 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15999 def _Notify(self, prereq, test, arg):
16000 """Notifies the client of a test.
16003 @param prereq: Whether this is a prereq-phase test
16005 @param test: Test name
16006 @param arg: Test argument (depends on test)
16010 errcls = errors.OpPrereqError
16012 errcls = errors.OpExecError
16014 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16018 def CheckArguments(self):
16019 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16020 self.expandnames_calls = 0
16022 def ExpandNames(self):
16023 checkargs_calls = getattr(self, "checkargs_calls", 0)
16024 if checkargs_calls < 1:
16025 raise errors.ProgrammerError("CheckArguments was not called")
16027 self.expandnames_calls += 1
16029 if self.op.notify_waitlock:
16030 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16032 self.LogInfo("Expanding names")
16034 # Get lock on master node (just to get a lock, not for a particular reason)
16035 self.needed_locks = {
16036 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16039 def Exec(self, feedback_fn):
16040 if self.expandnames_calls < 1:
16041 raise errors.ProgrammerError("ExpandNames was not called")
16043 if self.op.notify_exec:
16044 self._Notify(False, constants.JQT_EXEC, None)
16046 self.LogInfo("Executing")
16048 if self.op.log_messages:
16049 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16050 for idx, msg in enumerate(self.op.log_messages):
16051 self.LogInfo("Sending log message %s", idx + 1)
16052 feedback_fn(constants.JQT_MSGPREFIX + msg)
16053 # Report how many test messages have been sent
16054 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16057 raise errors.OpExecError("Opcode failure was requested")
16062 class LUTestAllocator(NoHooksLU):
16063 """Run allocator tests.
16065 This LU runs the allocator tests
16068 def CheckPrereq(self):
16069 """Check prerequisites.
16071 This checks the opcode parameters depending on the director and mode test.
16074 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16075 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16076 for attr in ["memory", "disks", "disk_template",
16077 "os", "tags", "nics", "vcpus"]:
16078 if not hasattr(self.op, attr):
16079 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16080 attr, errors.ECODE_INVAL)
16081 iname = self.cfg.ExpandInstanceName(self.op.name)
16082 if iname is not None:
16083 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16084 iname, errors.ECODE_EXISTS)
16085 if not isinstance(self.op.nics, list):
16086 raise errors.OpPrereqError("Invalid parameter 'nics'",
16087 errors.ECODE_INVAL)
16088 if not isinstance(self.op.disks, list):
16089 raise errors.OpPrereqError("Invalid parameter 'disks'",
16090 errors.ECODE_INVAL)
16091 for row in self.op.disks:
16092 if (not isinstance(row, dict) or
16093 constants.IDISK_SIZE not in row or
16094 not isinstance(row[constants.IDISK_SIZE], int) or
16095 constants.IDISK_MODE not in row or
16096 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16097 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16098 " parameter", errors.ECODE_INVAL)
16099 if self.op.hypervisor is None:
16100 self.op.hypervisor = self.cfg.GetHypervisorType()
16101 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16102 fname = _ExpandInstanceName(self.cfg, self.op.name)
16103 self.op.name = fname
16104 self.relocate_from = \
16105 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16106 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16107 constants.IALLOCATOR_MODE_NODE_EVAC):
16108 if not self.op.instances:
16109 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16110 self.op.instances = _GetWantedInstances(self, self.op.instances)
16112 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16113 self.op.mode, errors.ECODE_INVAL)
16115 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16116 if self.op.iallocator is None:
16117 raise errors.OpPrereqError("Missing allocator name",
16118 errors.ECODE_INVAL)
16119 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16120 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16121 self.op.direction, errors.ECODE_INVAL)
16123 def Exec(self, feedback_fn):
16124 """Run the allocator test.
16127 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16128 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16129 memory=self.op.memory,
16130 disks=self.op.disks,
16131 disk_template=self.op.disk_template,
16135 vcpus=self.op.vcpus,
16136 spindle_use=self.op.spindle_use,
16137 hypervisor=self.op.hypervisor)
16138 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16139 req = iallocator.IAReqRelocate(name=self.op.name,
16140 relocate_from=list(self.relocate_from))
16141 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16142 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16143 target_groups=self.op.target_groups)
16144 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16145 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16146 evac_mode=self.op.evac_mode)
16147 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16148 disk_template = self.op.disk_template
16149 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16150 memory=self.op.memory,
16151 disks=self.op.disks,
16152 disk_template=disk_template,
16156 vcpus=self.op.vcpus,
16157 spindle_use=self.op.spindle_use,
16158 hypervisor=self.op.hypervisor)
16159 for idx in range(self.op.count)]
16160 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16162 raise errors.ProgrammerError("Uncatched mode %s in"
16163 " LUTestAllocator.Exec", self.op.mode)
16165 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16166 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16167 result = ial.in_text
16169 ial.Run(self.op.iallocator, validate=False)
16170 result = ial.out_text
16174 class LUNetworkAdd(LogicalUnit):
16175 """Logical unit for creating networks.
16178 HPATH = "network-add"
16179 HTYPE = constants.HTYPE_NETWORK
16182 def BuildHooksNodes(self):
16183 """Build hooks nodes.
16186 mn = self.cfg.GetMasterNode()
16187 return ([mn], [mn])
16189 def CheckArguments(self):
16190 if self.op.mac_prefix:
16191 self.op.mac_prefix = \
16192 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16194 def ExpandNames(self):
16195 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16197 if self.op.conflicts_check:
16198 self.share_locks[locking.LEVEL_NODE] = 1
16199 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16200 self.needed_locks = {
16201 locking.LEVEL_NODE: locking.ALL_SET,
16202 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16205 self.needed_locks = {}
16207 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16209 def CheckPrereq(self):
16210 if self.op.network is None:
16211 raise errors.OpPrereqError("Network must be given",
16212 errors.ECODE_INVAL)
16215 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16216 except errors.OpPrereqError:
16219 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16220 " network (UUID: %s)" %
16221 (self.op.network_name, existing_uuid),
16222 errors.ECODE_EXISTS)
16224 # Check tag validity
16225 for tag in self.op.tags:
16226 objects.TaggableObject.ValidateTag(tag)
16228 def BuildHooksEnv(self):
16229 """Build hooks env.
16233 "name": self.op.network_name,
16234 "subnet": self.op.network,
16235 "gateway": self.op.gateway,
16236 "network6": self.op.network6,
16237 "gateway6": self.op.gateway6,
16238 "mac_prefix": self.op.mac_prefix,
16239 "network_type": self.op.network_type,
16240 "tags": self.op.tags,
16242 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16244 def Exec(self, feedback_fn):
16245 """Add the ip pool to the cluster.
16248 nobj = objects.Network(name=self.op.network_name,
16249 network=self.op.network,
16250 gateway=self.op.gateway,
16251 network6=self.op.network6,
16252 gateway6=self.op.gateway6,
16253 mac_prefix=self.op.mac_prefix,
16254 network_type=self.op.network_type,
16255 uuid=self.network_uuid,
16256 family=constants.IP4_VERSION)
16257 # Initialize the associated address pool
16259 pool = network.AddressPool.InitializeNetwork(nobj)
16260 except errors.AddressPoolError, e:
16261 raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16263 # Check if we need to reserve the nodes and the cluster master IP
16264 # These may not be allocated to any instances in routed mode, as
16265 # they wouldn't function anyway.
16266 if self.op.conflicts_check:
16267 for node in self.cfg.GetAllNodesInfo().values():
16268 for ip in [node.primary_ip, node.secondary_ip]:
16270 if pool.Contains(ip):
16272 self.LogInfo("Reserved IP address of node '%s' (%s)",
16274 except errors.AddressPoolError:
16275 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16278 master_ip = self.cfg.GetClusterInfo().master_ip
16280 if pool.Contains(master_ip):
16281 pool.Reserve(master_ip)
16282 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16283 except errors.AddressPoolError:
16284 self.LogWarning("Cannot reserve cluster master IP address (%s)",
16287 if self.op.add_reserved_ips:
16288 for ip in self.op.add_reserved_ips:
16290 pool.Reserve(ip, external=True)
16291 except errors.AddressPoolError, e:
16292 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16295 for tag in self.op.tags:
16298 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16299 del self.remove_locks[locking.LEVEL_NETWORK]
16302 class LUNetworkRemove(LogicalUnit):
16303 HPATH = "network-remove"
16304 HTYPE = constants.HTYPE_NETWORK
16307 def ExpandNames(self):
16308 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16310 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16311 self.needed_locks = {
16312 locking.LEVEL_NETWORK: [self.network_uuid],
16313 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16316 def CheckPrereq(self):
16317 """Check prerequisites.
16319 This checks that the given network name exists as a network, that is
16320 empty (i.e., contains no nodes), and that is not the last group of the
16324 # Verify that the network is not conncted.
16325 node_groups = [group.name
16326 for group in self.cfg.GetAllNodeGroupsInfo().values()
16327 if self.network_uuid in group.networks]
16330 self.LogWarning("Network '%s' is connected to the following"
16331 " node groups: %s" %
16332 (self.op.network_name,
16333 utils.CommaJoin(utils.NiceSort(node_groups))))
16334 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16336 def BuildHooksEnv(self):
16337 """Build hooks env.
16341 "NETWORK_NAME": self.op.network_name,
16344 def BuildHooksNodes(self):
16345 """Build hooks nodes.
16348 mn = self.cfg.GetMasterNode()
16349 return ([mn], [mn])
16351 def Exec(self, feedback_fn):
16352 """Remove the network.
16356 self.cfg.RemoveNetwork(self.network_uuid)
16357 except errors.ConfigurationError:
16358 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16359 (self.op.network_name, self.network_uuid))
16362 class LUNetworkSetParams(LogicalUnit):
16363 """Modifies the parameters of a network.
16366 HPATH = "network-modify"
16367 HTYPE = constants.HTYPE_NETWORK
16370 def CheckArguments(self):
16371 if (self.op.gateway and
16372 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16373 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16374 " at once", errors.ECODE_INVAL)
16376 def ExpandNames(self):
16377 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16379 self.needed_locks = {
16380 locking.LEVEL_NETWORK: [self.network_uuid],
16383 def CheckPrereq(self):
16384 """Check prerequisites.
16387 self.network = self.cfg.GetNetwork(self.network_uuid)
16388 self.gateway = self.network.gateway
16389 self.network_type = self.network.network_type
16390 self.mac_prefix = self.network.mac_prefix
16391 self.network6 = self.network.network6
16392 self.gateway6 = self.network.gateway6
16393 self.tags = self.network.tags
16395 self.pool = network.AddressPool(self.network)
16397 if self.op.gateway:
16398 if self.op.gateway == constants.VALUE_NONE:
16399 self.gateway = None
16401 self.gateway = self.op.gateway
16402 if self.pool.IsReserved(self.gateway):
16403 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16404 " reserved" % self.gateway,
16405 errors.ECODE_STATE)
16407 if self.op.network_type:
16408 if self.op.network_type == constants.VALUE_NONE:
16409 self.network_type = None
16411 self.network_type = self.op.network_type
16413 if self.op.mac_prefix:
16414 if self.op.mac_prefix == constants.VALUE_NONE:
16415 self.mac_prefix = None
16417 self.mac_prefix = \
16418 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16420 if self.op.gateway6:
16421 if self.op.gateway6 == constants.VALUE_NONE:
16422 self.gateway6 = None
16424 self.gateway6 = self.op.gateway6
16426 if self.op.network6:
16427 if self.op.network6 == constants.VALUE_NONE:
16428 self.network6 = None
16430 self.network6 = self.op.network6
16432 def BuildHooksEnv(self):
16433 """Build hooks env.
16437 "name": self.op.network_name,
16438 "subnet": self.network.network,
16439 "gateway": self.gateway,
16440 "network6": self.network6,
16441 "gateway6": self.gateway6,
16442 "mac_prefix": self.mac_prefix,
16443 "network_type": self.network_type,
16446 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16448 def BuildHooksNodes(self):
16449 """Build hooks nodes.
16452 mn = self.cfg.GetMasterNode()
16453 return ([mn], [mn])
16455 def Exec(self, feedback_fn):
16456 """Modifies the network.
16459 #TODO: reserve/release via temporary reservation manager
16460 # extend cfg.ReserveIp/ReleaseIp with the external flag
16461 if self.op.gateway:
16462 if self.gateway == self.network.gateway:
16463 self.LogWarning("Gateway is already %s", self.gateway)
16466 self.pool.Reserve(self.gateway, external=True)
16467 if self.network.gateway:
16468 self.pool.Release(self.network.gateway, external=True)
16469 self.network.gateway = self.gateway
16471 if self.op.add_reserved_ips:
16472 for ip in self.op.add_reserved_ips:
16474 if self.pool.IsReserved(ip):
16475 self.LogWarning("IP address %s is already reserved", ip)
16477 self.pool.Reserve(ip, external=True)
16478 except errors.AddressPoolError, err:
16479 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16481 if self.op.remove_reserved_ips:
16482 for ip in self.op.remove_reserved_ips:
16483 if ip == self.network.gateway:
16484 self.LogWarning("Cannot unreserve Gateway's IP")
16487 if not self.pool.IsReserved(ip):
16488 self.LogWarning("IP address %s is already unreserved", ip)
16490 self.pool.Release(ip, external=True)
16491 except errors.AddressPoolError, err:
16492 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16494 if self.op.mac_prefix:
16495 self.network.mac_prefix = self.mac_prefix
16497 if self.op.network6:
16498 self.network.network6 = self.network6
16500 if self.op.gateway6:
16501 self.network.gateway6 = self.gateway6
16503 if self.op.network_type:
16504 self.network.network_type = self.network_type
16506 self.pool.Validate()
16508 self.cfg.Update(self.network, feedback_fn)
16511 class _NetworkQuery(_QueryBase):
16512 FIELDS = query.NETWORK_FIELDS
16514 def ExpandNames(self, lu):
16515 lu.needed_locks = {}
16516 lu.share_locks = _ShareAll()
16518 self.do_locking = self.use_locking
16520 all_networks = lu.cfg.GetAllNetworksInfo()
16521 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16527 for name in self.names:
16528 if name in name_to_uuid:
16529 self.wanted.append(name_to_uuid[name])
16531 missing.append(name)
16534 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16535 errors.ECODE_NOENT)
16537 self.wanted = locking.ALL_SET
16539 if self.do_locking:
16540 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16541 if query.NETQ_INST in self.requested_data:
16542 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16543 if query.NETQ_GROUP in self.requested_data:
16544 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16546 def DeclareLocks(self, lu, level):
16549 def _GetQueryData(self, lu):
16550 """Computes the list of networks and their attributes.
16553 all_networks = lu.cfg.GetAllNetworksInfo()
16555 network_uuids = self._GetNames(lu, all_networks.keys(),
16556 locking.LEVEL_NETWORK)
16558 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16560 do_instances = query.NETQ_INST in self.requested_data
16561 do_groups = query.NETQ_GROUP in self.requested_data
16563 network_to_instances = None
16564 network_to_groups = None
16566 # For NETQ_GROUP, we need to map network->[groups]
16568 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16569 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16570 for _, group in all_groups.iteritems():
16571 for net_uuid in network_uuids:
16572 netparams = group.networks.get(net_uuid, None)
16574 info = (group.name, netparams[constants.NIC_MODE],
16575 netparams[constants.NIC_LINK])
16577 network_to_groups[net_uuid].append(info)
16580 all_instances = lu.cfg.GetAllInstancesInfo()
16581 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16582 for instance in all_instances.values():
16583 for nic in instance.nics:
16585 net_uuid = name_to_uuid[nic.network]
16586 if net_uuid in network_uuids:
16587 network_to_instances[net_uuid].append(instance.name)
16590 if query.NETQ_STATS in self.requested_data:
16593 self._GetStats(network.AddressPool(all_networks[uuid])))
16594 for uuid in network_uuids)
16598 return query.NetworkQueryData([all_networks[uuid]
16599 for uuid in network_uuids],
16601 network_to_instances,
16605 def _GetStats(pool):
16606 """Returns statistics for a network address pool.
16610 "free_count": pool.GetFreeCount(),
16611 "reserved_count": pool.GetReservedCount(),
16612 "map": pool.GetMap(),
16613 "external_reservations":
16614 utils.CommaJoin(pool.GetExternalReservations()),
16618 class LUNetworkQuery(NoHooksLU):
16619 """Logical unit for querying networks.
16624 def CheckArguments(self):
16625 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16626 self.op.output_fields, self.op.use_locking)
16628 def ExpandNames(self):
16629 self.nq.ExpandNames(self)
16631 def Exec(self, feedback_fn):
16632 return self.nq.OldStyleQuery(self)
16635 class LUNetworkConnect(LogicalUnit):
16636 """Connect a network to a nodegroup
16639 HPATH = "network-connect"
16640 HTYPE = constants.HTYPE_NETWORK
16643 def ExpandNames(self):
16644 self.network_name = self.op.network_name
16645 self.group_name = self.op.group_name
16646 self.network_mode = self.op.network_mode
16647 self.network_link = self.op.network_link
16649 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16650 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16652 self.needed_locks = {
16653 locking.LEVEL_INSTANCE: [],
16654 locking.LEVEL_NODEGROUP: [self.group_uuid],
16656 self.share_locks[locking.LEVEL_INSTANCE] = 1
16658 if self.op.conflicts_check:
16659 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16660 self.share_locks[locking.LEVEL_NETWORK] = 1
16662 def DeclareLocks(self, level):
16663 if level == locking.LEVEL_INSTANCE:
16664 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16666 # Lock instances optimistically, needs verification once group lock has
16668 if self.op.conflicts_check:
16669 self.needed_locks[locking.LEVEL_INSTANCE] = \
16670 self.cfg.GetNodeGroupInstances(self.group_uuid)
16672 def BuildHooksEnv(self):
16674 "GROUP_NAME": self.group_name,
16675 "GROUP_NETWORK_MODE": self.network_mode,
16676 "GROUP_NETWORK_LINK": self.network_link,
16680 def BuildHooksNodes(self):
16681 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16682 return (nodes, nodes)
16684 def CheckPrereq(self):
16685 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16687 assert self.group_uuid in owned_groups
16690 constants.NIC_MODE: self.network_mode,
16691 constants.NIC_LINK: self.network_link,
16693 objects.NIC.CheckParameterSyntax(self.netparams)
16695 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16696 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16697 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16698 self.connected = False
16699 if self.network_uuid in self.group.networks:
16700 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16701 (self.network_name, self.group.name))
16702 self.connected = True
16705 if self.op.conflicts_check:
16706 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16708 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16711 def Exec(self, feedback_fn):
16715 self.group.networks[self.network_uuid] = self.netparams
16716 self.cfg.Update(self.group, feedback_fn)
16719 def _NetworkConflictCheck(lu, check_fn, action):
16720 """Checks for network interface conflicts with a network.
16722 @type lu: L{LogicalUnit}
16723 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16725 @param check_fn: Function checking for conflict
16726 @type action: string
16727 @param action: Part of error message (see code)
16728 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16731 # Check if locked instances are still correct
16732 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16733 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16737 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16738 instconflicts = [(idx, nic.ip)
16739 for (idx, nic) in enumerate(instance.nics)
16743 conflicts.append((instance.name, instconflicts))
16746 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16747 " node group '%s', are in use: %s" %
16748 (lu.network_name, action, lu.group.name,
16749 utils.CommaJoin(("%s: %s" %
16750 (name, _FmtNetworkConflict(details)))
16751 for (name, details) in conflicts)))
16753 raise errors.OpPrereqError("Conflicting IP addresses found; "
16754 " remove/modify the corresponding network"
16755 " interfaces", errors.ECODE_STATE)
16758 def _FmtNetworkConflict(details):
16759 """Utility for L{_NetworkConflictCheck}.
16762 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16763 for (idx, ipaddr) in details)
16766 class LUNetworkDisconnect(LogicalUnit):
16767 """Disconnect a network to a nodegroup
16770 HPATH = "network-disconnect"
16771 HTYPE = constants.HTYPE_NETWORK
16774 def ExpandNames(self):
16775 self.network_name = self.op.network_name
16776 self.group_name = self.op.group_name
16778 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16779 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16781 self.needed_locks = {
16782 locking.LEVEL_INSTANCE: [],
16783 locking.LEVEL_NODEGROUP: [self.group_uuid],
16785 self.share_locks[locking.LEVEL_INSTANCE] = 1
16787 def DeclareLocks(self, level):
16788 if level == locking.LEVEL_INSTANCE:
16789 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16791 # Lock instances optimistically, needs verification once group lock has
16793 self.needed_locks[locking.LEVEL_INSTANCE] = \
16794 self.cfg.GetNodeGroupInstances(self.group_uuid)
16796 def BuildHooksEnv(self):
16798 "GROUP_NAME": self.group_name,
16802 def BuildHooksNodes(self):
16803 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16804 return (nodes, nodes)
16806 def CheckPrereq(self):
16807 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16809 assert self.group_uuid in owned_groups
16811 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16812 self.connected = True
16813 if self.network_uuid not in self.group.networks:
16814 self.LogWarning("Network '%s' is not mapped to group '%s'",
16815 self.network_name, self.group.name)
16816 self.connected = False
16819 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16822 def Exec(self, feedback_fn):
16823 if not self.connected:
16826 del self.group.networks[self.network_uuid]
16827 self.cfg.Update(self.group, feedback_fn)
16830 #: Query type implementations
16832 constants.QR_CLUSTER: _ClusterQuery,
16833 constants.QR_INSTANCE: _InstanceQuery,
16834 constants.QR_NODE: _NodeQuery,
16835 constants.QR_GROUP: _GroupQuery,
16836 constants.QR_NETWORK: _NetworkQuery,
16837 constants.QR_OS: _OsQuery,
16838 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16839 constants.QR_EXPORT: _ExportQuery,
16842 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16845 def _GetQueryImplementation(name):
16846 """Returns the implemtnation for a query type.
16848 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16852 return _QUERY_IMPL[name]
16854 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16855 errors.ECODE_INVAL)
16858 def _CheckForConflictingIp(lu, ip, node):
16859 """In case of conflicting IP address raise error.
16862 @param ip: IP address
16864 @param node: node name
16867 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16868 if conf_net is not None:
16869 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16871 errors.ECODE_STATE)
16873 return (None, None)