4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1234 _compute_fn=_ComputeMinMaxSpec):
1235 """Verifies ipolicy against provided specs.
1238 @param ipolicy: The ipolicy
1240 @param mem_size: The memory size
1241 @type cpu_count: int
1242 @param cpu_count: Used cpu cores
1243 @type disk_count: int
1244 @param disk_count: Number of disks used
1245 @type nic_count: int
1246 @param nic_count: Number of nics used
1247 @type disk_sizes: list of ints
1248 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249 @type spindle_use: int
1250 @param spindle_use: The number of spindles this instance uses
1251 @param _compute_fn: The compute function (unittest only)
1252 @return: A list of violations, or an empty list of no violations are found
1255 assert disk_count == len(disk_sizes)
1258 (constants.ISPEC_MEM_SIZE, "", mem_size),
1259 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260 (constants.ISPEC_DISK_COUNT, "", disk_count),
1261 (constants.ISPEC_NIC_COUNT, "", nic_count),
1262 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264 for idx, d in enumerate(disk_sizes)]
1267 (_compute_fn(name, qualifier, ipolicy, value)
1268 for (name, qualifier, value) in test_settings))
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272 _compute_fn=_ComputeIPolicySpecViolation):
1273 """Compute if instance meets the specs of ipolicy.
1276 @param ipolicy: The ipolicy to verify against
1277 @type instance: L{objects.Instance}
1278 @param instance: The instance to verify
1279 @param _compute_fn: The function to verify ipolicy (unittest only)
1280 @see: L{_ComputeIPolicySpecViolation}
1283 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286 disk_count = len(instance.disks)
1287 disk_sizes = [disk.size for disk in instance.disks]
1288 nic_count = len(instance.nics)
1290 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291 disk_sizes, spindle_use)
1294 def _ComputeIPolicyInstanceSpecViolation(
1295 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296 """Compute if instance specs meets the specs of ipolicy.
1299 @param ipolicy: The ipolicy to verify against
1300 @param instance_spec: dict
1301 @param instance_spec: The instance spec to verify
1302 @param _compute_fn: The function to verify ipolicy (unittest only)
1303 @see: L{_ComputeIPolicySpecViolation}
1306 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1313 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314 disk_sizes, spindle_use)
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1319 _compute_fn=_ComputeIPolicyInstanceViolation):
1320 """Compute if instance meets the specs of the new target group.
1322 @param ipolicy: The ipolicy to verify
1323 @param instance: The instance object to verify
1324 @param current_group: The current group of the instance
1325 @param target_group: The new group of the instance
1326 @param _compute_fn: The function to verify ipolicy (unittest only)
1327 @see: L{_ComputeIPolicySpecViolation}
1330 if current_group == target_group:
1333 return _compute_fn(ipolicy, instance)
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337 _compute_fn=_ComputeIPolicyNodeViolation):
1338 """Checks that the target node is correct in terms of instance policy.
1340 @param ipolicy: The ipolicy to verify
1341 @param instance: The instance object to verify
1342 @param node: The new node to relocate
1343 @param ignore: Ignore violations of the ipolicy
1344 @param _compute_fn: The function to verify ipolicy (unittest only)
1345 @see: L{_ComputeIPolicySpecViolation}
1348 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1352 msg = ("Instance does not meet target node group's (%s) instance"
1353 " policy: %s") % (node.group, utils.CommaJoin(res))
1357 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361 """Computes a set of any instances that would violate the new ipolicy.
1363 @param old_ipolicy: The current (still in-place) ipolicy
1364 @param new_ipolicy: The new (to become) ipolicy
1365 @param instances: List of instances to verify
1366 @return: A list of instances which violates the new ipolicy but
1370 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371 _ComputeViolatingInstances(old_ipolicy, instances))
1374 def _ExpandItemName(fn, name, kind):
1375 """Expand an item name.
1377 @param fn: the function to use for expansion
1378 @param name: requested item name
1379 @param kind: text description ('Node' or 'Instance')
1380 @return: the resolved (full) name
1381 @raise errors.OpPrereqError: if the item is not found
1384 full_name = fn(name)
1385 if full_name is None:
1386 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1391 def _ExpandNodeName(cfg, name):
1392 """Wrapper over L{_ExpandItemName} for nodes."""
1393 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1396 def _ExpandInstanceName(cfg, name):
1397 """Wrapper over L{_ExpandItemName} for instance."""
1398 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1403 """Builds network related env variables for hooks
1405 This builds the hook environment from individual variables.
1408 @param name: the name of the network
1409 @type subnet: string
1410 @param subnet: the ipv4 subnet
1411 @type gateway: string
1412 @param gateway: the ipv4 gateway
1413 @type network6: string
1414 @param network6: the ipv6 subnet
1415 @type gateway6: string
1416 @param gateway6: the ipv6 gateway
1417 @type mac_prefix: string
1418 @param mac_prefix: the mac_prefix
1420 @param tags: the tags of the network
1425 env["NETWORK_NAME"] = name
1427 env["NETWORK_SUBNET"] = subnet
1429 env["NETWORK_GATEWAY"] = gateway
1431 env["NETWORK_SUBNET6"] = network6
1433 env["NETWORK_GATEWAY6"] = gateway6
1435 env["NETWORK_MAC_PREFIX"] = mac_prefix
1437 env["NETWORK_TAGS"] = " ".join(tags)
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443 minmem, maxmem, vcpus, nics, disk_template, disks,
1444 bep, hvp, hypervisor_name, tags):
1445 """Builds instance related env variables for hooks
1447 This builds the hook environment from individual variables.
1450 @param name: the name of the instance
1451 @type primary_node: string
1452 @param primary_node: the name of the instance's primary node
1453 @type secondary_nodes: list
1454 @param secondary_nodes: list of secondary nodes as strings
1455 @type os_type: string
1456 @param os_type: the name of the instance's OS
1457 @type status: string
1458 @param status: the desired status of the instance
1459 @type minmem: string
1460 @param minmem: the minimum memory size of the instance
1461 @type maxmem: string
1462 @param maxmem: the maximum memory size of the instance
1464 @param vcpus: the count of VCPUs the instance has
1466 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467 the NICs the instance has
1468 @type disk_template: string
1469 @param disk_template: the disk template of the instance
1471 @param disks: the list of (size, mode) pairs
1473 @param bep: the backend parameters for the instance
1475 @param hvp: the hypervisor parameters for the instance
1476 @type hypervisor_name: string
1477 @param hypervisor_name: the hypervisor for the instance
1479 @param tags: list of instance tags as strings
1481 @return: the hook environment for this instance
1486 "INSTANCE_NAME": name,
1487 "INSTANCE_PRIMARY": primary_node,
1488 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489 "INSTANCE_OS_TYPE": os_type,
1490 "INSTANCE_STATUS": status,
1491 "INSTANCE_MINMEM": minmem,
1492 "INSTANCE_MAXMEM": maxmem,
1493 # TODO(2.7) remove deprecated "memory" value
1494 "INSTANCE_MEMORY": maxmem,
1495 "INSTANCE_VCPUS": vcpus,
1496 "INSTANCE_DISK_TEMPLATE": disk_template,
1497 "INSTANCE_HYPERVISOR": hypervisor_name,
1500 nic_count = len(nics)
1501 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1504 env["INSTANCE_NIC%d_IP" % idx] = ip
1505 env["INSTANCE_NIC%d_MAC" % idx] = mac
1506 env["INSTANCE_NIC%d_MODE" % idx] = mode
1507 env["INSTANCE_NIC%d_LINK" % idx] = link
1509 nobj = objects.Network.FromDict(netinfo)
1510 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1512 # FIXME: broken network reference: the instance NIC specifies a
1513 # network, but the relevant network entry was not in the config. This
1514 # should be made impossible.
1515 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1554 cluster = lu.cfg.GetClusterInfo()
1555 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556 mode = filled_params[constants.NIC_MODE]
1557 link = filled_params[constants.NIC_LINK]
1560 nobj = lu.cfg.GetNetwork(nic.network)
1561 netinfo = objects.Network.ToDict(nobj)
1562 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1565 def _NICListToTuple(lu, nics):
1566 """Build a list of nic information tuples.
1568 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569 value in LUInstanceQueryData.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nics: list of L{objects.NIC}
1574 @param nics: list of nics to convert to hooks tuples
1579 hooks_nics.append(_NICToTuple(lu, nic))
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584 """Builds instance related env variables for hooks from an object.
1586 @type lu: L{LogicalUnit}
1587 @param lu: the logical unit on whose behalf we execute
1588 @type instance: L{objects.Instance}
1589 @param instance: the instance for which we should build the
1591 @type override: dict
1592 @param override: dictionary with key/values that will override
1595 @return: the hook environment dictionary
1598 cluster = lu.cfg.GetClusterInfo()
1599 bep = cluster.FillBE(instance)
1600 hvp = cluster.FillHV(instance)
1602 "name": instance.name,
1603 "primary_node": instance.primary_node,
1604 "secondary_nodes": instance.secondary_nodes,
1605 "os_type": instance.os,
1606 "status": instance.admin_state,
1607 "maxmem": bep[constants.BE_MAXMEM],
1608 "minmem": bep[constants.BE_MINMEM],
1609 "vcpus": bep[constants.BE_VCPUS],
1610 "nics": _NICListToTuple(lu, instance.nics),
1611 "disk_template": instance.disk_template,
1612 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1615 "hypervisor_name": instance.hypervisor,
1616 "tags": instance.tags,
1619 args.update(override)
1620 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1623 def _AdjustCandidatePool(lu, exceptions):
1624 """Adjust the candidate pool after node operations.
1627 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1629 lu.LogInfo("Promoted nodes to master candidate role: %s",
1630 utils.CommaJoin(node.name for node in mod_list))
1631 for name in mod_list:
1632 lu.context.ReaddNode(name)
1633 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1635 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640 """Decide whether I should promote myself as a master candidate.
1643 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645 # the new node will increase mc_max with one, so:
1646 mc_should = min(mc_should + 1, cp_size)
1647 return mc_now < mc_should
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651 """Computes a set of instances who violates given ipolicy.
1653 @param ipolicy: The ipolicy to verify
1654 @type instances: object.Instance
1655 @param instances: List of instances to verify
1656 @return: A frozenset of instance names violating the ipolicy
1659 return frozenset([inst.name for inst in instances
1660 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664 """Check that the brigdes needed by a list of nics exist.
1667 cluster = lu.cfg.GetClusterInfo()
1668 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669 brlist = [params[constants.NIC_LINK] for params in paramslist
1670 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1672 result = lu.rpc.call_bridges_exist(target_node, brlist)
1673 result.Raise("Error checking bridges on destination node '%s'" %
1674 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678 """Check that the brigdes needed by an instance exist.
1682 node = instance.primary_node
1683 _CheckNicsBridgesExist(lu, instance.nics, node)
1686 def _CheckOSVariant(os_obj, name):
1687 """Check whether an OS name conforms to the os variants specification.
1689 @type os_obj: L{objects.OS}
1690 @param os_obj: OS object to check
1692 @param name: OS name passed by the user, to check for validity
1695 variant = objects.OS.GetVariant(name)
1696 if not os_obj.supported_variants:
1698 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699 " passed)" % (os_obj.name, variant),
1703 raise errors.OpPrereqError("OS name must include a variant",
1706 if variant not in os_obj.supported_variants:
1707 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1710 def _GetNodeInstancesInner(cfg, fn):
1711 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1714 def _GetNodeInstances(cfg, node_name):
1715 """Returns a list of all primary and secondary instances on a node.
1719 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723 """Returns primary instances on a node.
1726 return _GetNodeInstancesInner(cfg,
1727 lambda inst: node_name == inst.primary_node)
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731 """Returns secondary instances on a node.
1734 return _GetNodeInstancesInner(cfg,
1735 lambda inst: node_name in inst.secondary_nodes)
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739 """Returns the arguments for a storage type.
1742 # Special case for file storage
1743 if storage_type == constants.ST_FILE:
1744 # storage.FileStorage wants a list of storage directories
1745 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1753 for dev in instance.disks:
1754 cfg.SetDiskID(dev, node_name)
1756 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1758 result.Raise("Failed to get disk status from node %s" % node_name,
1759 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1761 for idx, bdev_status in enumerate(result.payload):
1762 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769 """Check the sanity of iallocator and node arguments and use the
1770 cluster-wide iallocator if appropriate.
1772 Check that at most one of (iallocator, node) is specified. If none is
1773 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774 then the LU's opcode's iallocator slot is filled with the cluster-wide
1777 @type iallocator_slot: string
1778 @param iallocator_slot: the name of the opcode iallocator slot
1779 @type node_slot: string
1780 @param node_slot: the name of the opcode target node slot
1783 node = getattr(lu.op, node_slot, None)
1784 ialloc = getattr(lu.op, iallocator_slot, None)
1788 if node is not None and ialloc is not None:
1789 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1791 elif ((node is None and ialloc is None) or
1792 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793 default_iallocator = lu.cfg.GetDefaultIAllocator()
1794 if default_iallocator:
1795 setattr(lu.op, iallocator_slot, default_iallocator)
1797 raise errors.OpPrereqError("No iallocator or node given and no"
1798 " cluster-wide default iallocator found;"
1799 " please specify either an iallocator or a"
1800 " node, or set a cluster-wide default"
1801 " iallocator", errors.ECODE_INVAL)
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805 """Decides on which iallocator to use.
1807 @type cfg: L{config.ConfigWriter}
1808 @param cfg: Cluster configuration object
1809 @type ialloc: string or None
1810 @param ialloc: Iallocator specified in opcode
1812 @return: Iallocator name
1816 # Use default iallocator
1817 ialloc = cfg.GetDefaultIAllocator()
1820 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821 " opcode nor as a cluster-wide default",
1827 def _CheckHostnameSane(lu, name):
1828 """Ensures that a given hostname resolves to a 'sane' name.
1830 The given name is required to be a prefix of the resolved hostname,
1831 to prevent accidental mismatches.
1833 @param lu: the logical unit on behalf of which we're checking
1834 @param name: the name we should resolve and check
1835 @return: the resolved hostname object
1838 hostname = netutils.GetHostname(name=name)
1839 if hostname.name != name:
1840 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841 if not utils.MatchNameComponent(name, [hostname.name]):
1842 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843 " same as given hostname '%s'") %
1844 (hostname.name, name), errors.ECODE_INVAL)
1848 class LUClusterPostInit(LogicalUnit):
1849 """Logical unit for running hooks after cluster initialization.
1852 HPATH = "cluster-init"
1853 HTYPE = constants.HTYPE_CLUSTER
1855 def BuildHooksEnv(self):
1860 "OP_TARGET": self.cfg.GetClusterName(),
1863 def BuildHooksNodes(self):
1864 """Build hooks nodes.
1867 return ([], [self.cfg.GetMasterNode()])
1869 def Exec(self, feedback_fn):
1876 class LUClusterDestroy(LogicalUnit):
1877 """Logical unit for destroying the cluster.
1880 HPATH = "cluster-destroy"
1881 HTYPE = constants.HTYPE_CLUSTER
1883 def BuildHooksEnv(self):
1888 "OP_TARGET": self.cfg.GetClusterName(),
1891 def BuildHooksNodes(self):
1892 """Build hooks nodes.
1897 def CheckPrereq(self):
1898 """Check prerequisites.
1900 This checks whether the cluster is empty.
1902 Any errors are signaled by raising errors.OpPrereqError.
1905 master = self.cfg.GetMasterNode()
1907 nodelist = self.cfg.GetNodeList()
1908 if len(nodelist) != 1 or nodelist[0] != master:
1909 raise errors.OpPrereqError("There are still %d node(s) in"
1910 " this cluster." % (len(nodelist) - 1),
1912 instancelist = self.cfg.GetInstanceList()
1914 raise errors.OpPrereqError("There are still %d instance(s) in"
1915 " this cluster." % len(instancelist),
1918 def Exec(self, feedback_fn):
1919 """Destroys the cluster.
1922 master_params = self.cfg.GetMasterNetworkParameters()
1924 # Run post hooks on master node before it's removed
1925 _RunPostHook(self, master_params.name)
1927 ems = self.cfg.GetUseExternalMipScript()
1928 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1931 self.LogWarning("Error disabling the master IP address: %s",
1934 return master_params.name
1937 def _VerifyCertificate(filename):
1938 """Verifies a certificate for L{LUClusterVerifyConfig}.
1940 @type filename: string
1941 @param filename: Path to PEM file
1945 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946 utils.ReadFile(filename))
1947 except Exception, err: # pylint: disable=W0703
1948 return (LUClusterVerifyConfig.ETYPE_ERROR,
1949 "Failed to load X509 certificate %s: %s" % (filename, err))
1952 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953 constants.SSL_CERT_EXPIRATION_ERROR)
1956 fnamemsg = "While verifying %s: %s" % (filename, msg)
1961 return (None, fnamemsg)
1962 elif errcode == utils.CERT_WARNING:
1963 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964 elif errcode == utils.CERT_ERROR:
1965 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1967 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1970 def _GetAllHypervisorParameters(cluster, instances):
1971 """Compute the set of all hypervisor parameters.
1973 @type cluster: L{objects.Cluster}
1974 @param cluster: the cluster object
1975 @param instances: list of L{objects.Instance}
1976 @param instances: additional instances from which to obtain parameters
1977 @rtype: list of (origin, hypervisor, parameters)
1978 @return: a list with all parameters found, indicating the hypervisor they
1979 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1984 for hv_name in cluster.enabled_hypervisors:
1985 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1987 for os_name, os_hvp in cluster.os_hvp.items():
1988 for hv_name, hv_params in os_hvp.items():
1990 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1993 # TODO: collapse identical parameter values in a single one
1994 for instance in instances:
1995 if instance.hvparams:
1996 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997 cluster.FillHV(instance)))
2002 class _VerifyErrors(object):
2003 """Mix-in for cluster/group verify LUs.
2005 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006 self.op and self._feedback_fn to be available.)
2010 ETYPE_FIELD = "code"
2011 ETYPE_ERROR = "ERROR"
2012 ETYPE_WARNING = "WARNING"
2014 def _Error(self, ecode, item, msg, *args, **kwargs):
2015 """Format an error message.
2017 Based on the opcode's error_codes parameter, either format a
2018 parseable error code, or a simpler error string.
2020 This must be called only from Exec and functions called from Exec.
2023 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024 itype, etxt, _ = ecode
2025 # If the error code is in the list of ignored errors, demote the error to a
2027 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2028 ltype = self.ETYPE_WARNING
2029 # first complete the msg
2032 # then format the whole message
2033 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2040 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041 # and finally report it via the feedback_fn
2042 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2043 # do not mark the operation as failed for WARN cases only
2044 if ltype == self.ETYPE_ERROR:
2047 def _ErrorIf(self, cond, *args, **kwargs):
2048 """Log an error message if the passed condition is True.
2052 or self.op.debug_simulate_errors): # pylint: disable=E1101
2053 self._Error(*args, **kwargs)
2056 class LUClusterVerify(NoHooksLU):
2057 """Submits all jobs necessary to verify the cluster.
2062 def ExpandNames(self):
2063 self.needed_locks = {}
2065 def Exec(self, feedback_fn):
2068 if self.op.group_name:
2069 groups = [self.op.group_name]
2070 depends_fn = lambda: None
2072 groups = self.cfg.GetNodeGroupList()
2074 # Verify global configuration
2076 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2079 # Always depend on global verification
2080 depends_fn = lambda: [(-len(jobs), [])]
2083 [opcodes.OpClusterVerifyGroup(group_name=group,
2084 ignore_errors=self.op.ignore_errors,
2085 depends=depends_fn())]
2086 for group in groups)
2088 # Fix up all parameters
2089 for op in itertools.chain(*jobs): # pylint: disable=W0142
2090 op.debug_simulate_errors = self.op.debug_simulate_errors
2091 op.verbose = self.op.verbose
2092 op.error_codes = self.op.error_codes
2094 op.skip_checks = self.op.skip_checks
2095 except AttributeError:
2096 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2098 return ResultWithJobs(jobs)
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102 """Verifies the cluster config.
2107 def _VerifyHVP(self, hvp_data):
2108 """Verifies locally the syntax of the hypervisor parameters.
2111 for item, hv_name, hv_params in hvp_data:
2112 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2115 hv_class = hypervisor.GetHypervisorClass(hv_name)
2116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117 hv_class.CheckParameterSyntax(hv_params)
2118 except errors.GenericError, err:
2119 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2121 def ExpandNames(self):
2122 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123 self.share_locks = _ShareAll()
2125 def CheckPrereq(self):
2126 """Check prerequisites.
2129 # Retrieve all information
2130 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131 self.all_node_info = self.cfg.GetAllNodesInfo()
2132 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2134 def Exec(self, feedback_fn):
2135 """Verify integrity of cluster, performing various test on nodes.
2139 self._feedback_fn = feedback_fn
2141 feedback_fn("* Verifying cluster config")
2143 for msg in self.cfg.VerifyConfig():
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2146 feedback_fn("* Verifying cluster certificate files")
2148 for cert_filename in pathutils.ALL_CERT_FILES:
2149 (errcode, msg) = _VerifyCertificate(cert_filename)
2150 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2152 feedback_fn("* Verifying hypervisor parameters")
2154 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155 self.all_inst_info.values()))
2157 feedback_fn("* Verifying all nodes belong to an existing group")
2159 # We do this verification here because, should this bogus circumstance
2160 # occur, it would never be caught by VerifyGroup, which only acts on
2161 # nodes/instances reachable from existing node groups.
2163 dangling_nodes = set(node.name for node in self.all_node_info.values()
2164 if node.group not in self.all_group_info)
2166 dangling_instances = {}
2167 no_node_instances = []
2169 for inst in self.all_inst_info.values():
2170 if inst.primary_node in dangling_nodes:
2171 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172 elif inst.primary_node not in self.all_node_info:
2173 no_node_instances.append(inst.name)
2178 utils.CommaJoin(dangling_instances.get(node.name,
2180 for node in dangling_nodes]
2182 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2184 "the following nodes (and their instances) belong to a non"
2185 " existing group: %s", utils.CommaJoin(pretty_dangling))
2187 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2189 "the following instances have a non-existing primary-node:"
2190 " %s", utils.CommaJoin(no_node_instances))
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196 """Verifies the status of a node group.
2199 HPATH = "cluster-verify"
2200 HTYPE = constants.HTYPE_CLUSTER
2203 _HOOKS_INDENT_RE = re.compile("^", re.M)
2205 class NodeImage(object):
2206 """A class representing the logical and physical status of a node.
2209 @ivar name: the node name to which this object refers
2210 @ivar volumes: a structure as returned from
2211 L{ganeti.backend.GetVolumeList} (runtime)
2212 @ivar instances: a list of running instances (runtime)
2213 @ivar pinst: list of configured primary instances (config)
2214 @ivar sinst: list of configured secondary instances (config)
2215 @ivar sbp: dictionary of {primary-node: list of instances} for all
2216 instances for which this node is secondary (config)
2217 @ivar mfree: free memory, as reported by hypervisor (runtime)
2218 @ivar dfree: free disk, as reported by the node (runtime)
2219 @ivar offline: the offline status (config)
2220 @type rpc_fail: boolean
2221 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222 not whether the individual keys were correct) (runtime)
2223 @type lvm_fail: boolean
2224 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225 @type hyp_fail: boolean
2226 @ivar hyp_fail: whether the RPC call didn't return the instance list
2227 @type ghost: boolean
2228 @ivar ghost: whether this is a known node or not (config)
2229 @type os_fail: boolean
2230 @ivar os_fail: whether the RPC call didn't return valid OS data
2232 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233 @type vm_capable: boolean
2234 @ivar vm_capable: whether the node can host instances
2236 @ivar pv_min: size in MiB of the smallest PVs
2238 @ivar pv_max: size in MiB of the biggest PVs
2241 def __init__(self, offline=False, name=None, vm_capable=True):
2250 self.offline = offline
2251 self.vm_capable = vm_capable
2252 self.rpc_fail = False
2253 self.lvm_fail = False
2254 self.hyp_fail = False
2256 self.os_fail = False
2261 def ExpandNames(self):
2262 # This raises errors.OpPrereqError on its own:
2263 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2265 # Get instances in node group; this is unsafe and needs verification later
2267 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2269 self.needed_locks = {
2270 locking.LEVEL_INSTANCE: inst_names,
2271 locking.LEVEL_NODEGROUP: [self.group_uuid],
2272 locking.LEVEL_NODE: [],
2274 # This opcode is run by watcher every five minutes and acquires all nodes
2275 # for a group. It doesn't run for a long time, so it's better to acquire
2276 # the node allocation lock as well.
2277 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2280 self.share_locks = _ShareAll()
2282 def DeclareLocks(self, level):
2283 if level == locking.LEVEL_NODE:
2284 # Get members of node group; this is unsafe and needs verification later
2285 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2287 all_inst_info = self.cfg.GetAllInstancesInfo()
2289 # In Exec(), we warn about mirrored instances that have primary and
2290 # secondary living in separate node groups. To fully verify that
2291 # volumes for these instances are healthy, we will need to do an
2292 # extra call to their secondaries. We ensure here those nodes will
2294 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295 # Important: access only the instances whose lock is owned
2296 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297 nodes.update(all_inst_info[inst].secondary_nodes)
2299 self.needed_locks[locking.LEVEL_NODE] = nodes
2301 def CheckPrereq(self):
2302 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2305 group_nodes = set(self.group_info.members)
2307 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2310 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312 unlocked_instances = \
2313 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2316 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317 utils.CommaJoin(unlocked_nodes),
2320 if unlocked_instances:
2321 raise errors.OpPrereqError("Missing lock for instances: %s" %
2322 utils.CommaJoin(unlocked_instances),
2325 self.all_node_info = self.cfg.GetAllNodesInfo()
2326 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2328 self.my_node_names = utils.NiceSort(group_nodes)
2329 self.my_inst_names = utils.NiceSort(group_instances)
2331 self.my_node_info = dict((name, self.all_node_info[name])
2332 for name in self.my_node_names)
2334 self.my_inst_info = dict((name, self.all_inst_info[name])
2335 for name in self.my_inst_names)
2337 # We detect here the nodes that will need the extra RPC calls for verifying
2338 # split LV volumes; they should be locked.
2339 extra_lv_nodes = set()
2341 for inst in self.my_inst_info.values():
2342 if inst.disk_template in constants.DTS_INT_MIRROR:
2343 for nname in inst.all_nodes:
2344 if self.all_node_info[nname].group != self.group_uuid:
2345 extra_lv_nodes.add(nname)
2347 unlocked_lv_nodes = \
2348 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2350 if unlocked_lv_nodes:
2351 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352 utils.CommaJoin(unlocked_lv_nodes),
2354 self.extra_lv_nodes = list(extra_lv_nodes)
2356 def _VerifyNode(self, ninfo, nresult):
2357 """Perform some basic validation on data returned from a node.
2359 - check the result data structure is well formed and has all the
2361 - check ganeti version
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the results from the node
2367 @return: whether overall this call was successful (and we can expect
2368 reasonable values in the respose)
2372 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374 # main result, nresult should be a non-empty dict
2375 test = not nresult or not isinstance(nresult, dict)
2376 _ErrorIf(test, constants.CV_ENODERPC, node,
2377 "unable to verify node: no data returned")
2381 # compares ganeti version
2382 local_version = constants.PROTOCOL_VERSION
2383 remote_version = nresult.get("version", None)
2384 test = not (remote_version and
2385 isinstance(remote_version, (list, tuple)) and
2386 len(remote_version) == 2)
2387 _ErrorIf(test, constants.CV_ENODERPC, node,
2388 "connection to node returned invalid data")
2392 test = local_version != remote_version[0]
2393 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394 "incompatible protocol versions: master %s,"
2395 " node %s", local_version, remote_version[0])
2399 # node seems compatible, we can actually try to look into its results
2401 # full package version
2402 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403 constants.CV_ENODEVERSION, node,
2404 "software version mismatch: master %s, node %s",
2405 constants.RELEASE_VERSION, remote_version[1],
2406 code=self.ETYPE_WARNING)
2408 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409 if ninfo.vm_capable and isinstance(hyp_result, dict):
2410 for hv_name, hv_result in hyp_result.iteritems():
2411 test = hv_result is not None
2412 _ErrorIf(test, constants.CV_ENODEHV, node,
2413 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2415 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416 if ninfo.vm_capable and isinstance(hvp_result, list):
2417 for item, hv_name, hv_result in hvp_result:
2418 _ErrorIf(True, constants.CV_ENODEHV, node,
2419 "hypervisor %s parameter verify failure (source %s): %s",
2420 hv_name, item, hv_result)
2422 test = nresult.get(constants.NV_NODESETUP,
2423 ["Missing NODESETUP results"])
2424 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2429 def _VerifyNodeTime(self, ninfo, nresult,
2430 nvinfo_starttime, nvinfo_endtime):
2431 """Check the node time.
2433 @type ninfo: L{objects.Node}
2434 @param ninfo: the node to check
2435 @param nresult: the remote results for the node
2436 @param nvinfo_starttime: the start time of the RPC call
2437 @param nvinfo_endtime: the end time of the RPC call
2441 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2443 ntime = nresult.get(constants.NV_TIME, None)
2445 ntime_merged = utils.MergeTime(ntime)
2446 except (ValueError, TypeError):
2447 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2450 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2457 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458 "Node time diverges by at least %s from master node time",
2461 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462 """Check the node LVM results and update info for cross-node checks.
2464 @type ninfo: L{objects.Node}
2465 @param ninfo: the node to check
2466 @param nresult: the remote results for the node
2467 @param vg_name: the configured VG name
2468 @type nimg: L{NodeImage}
2469 @param nimg: node image
2476 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2478 # checks vg existence and size > 20G
2479 vglist = nresult.get(constants.NV_VGLIST, None)
2481 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2483 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484 constants.MIN_VG_SIZE)
2485 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2488 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2490 self._Error(constants.CV_ENODELVM, node, em)
2491 if pvminmax is not None:
2492 (nimg.pv_min, nimg.pv_max) = pvminmax
2494 def _VerifyGroupLVM(self, node_image, vg_name):
2495 """Check cross-node consistency in LVM.
2497 @type node_image: dict
2498 @param node_image: info about nodes, mapping from node to names to
2499 L{NodeImage} objects
2500 @param vg_name: the configured VG name
2506 # Only exlcusive storage needs this kind of checks
2507 if not self._exclusive_storage:
2510 # exclusive_storage wants all PVs to have the same size (approximately),
2511 # if the smallest and the biggest ones are okay, everything is fine.
2512 # pv_min is None iff pv_max is None
2513 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2516 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520 "PV sizes differ too much in the group; smallest (%s MB) is"
2521 " on %s, biggest (%s MB) is on %s",
2522 pvmin, minnode, pvmax, maxnode)
2524 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525 """Check the node bridges.
2527 @type ninfo: L{objects.Node}
2528 @param ninfo: the node to check
2529 @param nresult: the remote results for the node
2530 @param bridges: the expected list of bridges
2537 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539 missing = nresult.get(constants.NV_BRIDGES, None)
2540 test = not isinstance(missing, list)
2541 _ErrorIf(test, constants.CV_ENODENET, node,
2542 "did not return valid bridge information")
2544 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2547 def _VerifyNodeUserScripts(self, ninfo, nresult):
2548 """Check the results of user scripts presence and executability on the node
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2557 test = not constants.NV_USERSCRIPTS in nresult
2558 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559 "did not return user scripts information")
2561 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2563 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564 "user scripts not present or not executable: %s" %
2565 utils.CommaJoin(sorted(broken_scripts)))
2567 def _VerifyNodeNetwork(self, ninfo, nresult):
2568 """Check the node network connectivity results.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2576 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2578 test = constants.NV_NODELIST not in nresult
2579 _ErrorIf(test, constants.CV_ENODESSH, node,
2580 "node hasn't returned node ssh connectivity data")
2582 if nresult[constants.NV_NODELIST]:
2583 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584 _ErrorIf(True, constants.CV_ENODESSH, node,
2585 "ssh communication with node '%s': %s", a_node, a_msg)
2587 test = constants.NV_NODENETTEST not in nresult
2588 _ErrorIf(test, constants.CV_ENODENET, node,
2589 "node hasn't returned node tcp connectivity data")
2591 if nresult[constants.NV_NODENETTEST]:
2592 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2594 _ErrorIf(True, constants.CV_ENODENET, node,
2595 "tcp communication with node '%s': %s",
2596 anode, nresult[constants.NV_NODENETTEST][anode])
2598 test = constants.NV_MASTERIP not in nresult
2599 _ErrorIf(test, constants.CV_ENODENET, node,
2600 "node hasn't returned node master IP reachability data")
2602 if not nresult[constants.NV_MASTERIP]:
2603 if node == self.master_node:
2604 msg = "the master node cannot reach the master IP (not configured?)"
2606 msg = "cannot reach the master IP"
2607 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2609 def _VerifyInstance(self, instance, inst_config, node_image,
2611 """Verify an instance.
2613 This function checks to see if the required block devices are
2614 available on the instance's node, and that the nodes are in the correct
2618 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 pnode = inst_config.primary_node
2620 pnode_img = node_image[pnode]
2621 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2623 node_vol_should = {}
2624 inst_config.MapLVsByNode(node_vol_should)
2626 cluster = self.cfg.GetClusterInfo()
2627 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2629 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631 code=self.ETYPE_WARNING)
2633 for node in node_vol_should:
2634 n_img = node_image[node]
2635 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636 # ignore missing volumes on offline or broken nodes
2638 for volume in node_vol_should[node]:
2639 test = volume not in n_img.volumes
2640 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641 "volume %s missing on node %s", volume, node)
2643 if inst_config.admin_state == constants.ADMINST_UP:
2644 test = instance not in pnode_img.instances and not pnode_img.offline
2645 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646 "instance not running on its primary node %s",
2648 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649 "instance is marked as running and lives on offline node %s",
2652 diskdata = [(nname, success, status, idx)
2653 for (nname, disks) in diskstatus.items()
2654 for idx, (success, status) in enumerate(disks)]
2656 for nname, success, bdev_status, idx in diskdata:
2657 # the 'ghost node' construction in Exec() ensures that we have a
2659 snode = node_image[nname]
2660 bad_snode = snode.ghost or snode.offline
2661 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662 not success and not bad_snode,
2663 constants.CV_EINSTANCEFAULTYDISK, instance,
2664 "couldn't retrieve status for disk/%s on %s: %s",
2665 idx, nname, bdev_status)
2666 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668 constants.CV_EINSTANCEFAULTYDISK, instance,
2669 "disk/%s on %s is faulty", idx, nname)
2671 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673 " primary node failed", instance)
2675 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676 constants.CV_EINSTANCELAYOUT,
2677 instance, "instance has multiple secondary nodes: %s",
2678 utils.CommaJoin(inst_config.secondary_nodes),
2679 code=self.ETYPE_WARNING)
2681 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682 # Disk template not compatible with exclusive_storage: no instance
2683 # node should have the flag set
2684 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685 inst_config.all_nodes)
2686 es_nodes = [n for (n, es) in es_flags.items()
2688 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689 "instance has template %s, which is not supported on nodes"
2690 " that have exclusive storage set: %s",
2691 inst_config.disk_template, utils.CommaJoin(es_nodes))
2693 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695 instance_groups = {}
2697 for node in instance_nodes:
2698 instance_groups.setdefault(self.all_node_info[node].group,
2702 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703 # Sort so that we always list the primary node first.
2704 for group, nodes in sorted(instance_groups.items(),
2705 key=lambda (_, nodes): pnode in nodes,
2708 self._ErrorIf(len(instance_groups) > 1,
2709 constants.CV_EINSTANCESPLITGROUPS,
2710 instance, "instance has primary and secondary nodes in"
2711 " different groups: %s", utils.CommaJoin(pretty_list),
2712 code=self.ETYPE_WARNING)
2714 inst_nodes_offline = []
2715 for snode in inst_config.secondary_nodes:
2716 s_img = node_image[snode]
2717 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718 snode, "instance %s, connection to secondary node failed",
2722 inst_nodes_offline.append(snode)
2724 # warn that the instance lives on offline nodes
2725 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726 "instance has offline secondary node(s) %s",
2727 utils.CommaJoin(inst_nodes_offline))
2728 # ... or ghost/non-vm_capable nodes
2729 for node in inst_config.all_nodes:
2730 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731 instance, "instance lives on ghost node %s", node)
2732 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733 instance, "instance lives on non-vm_capable node %s", node)
2735 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736 """Verify if there are any unknown volumes in the cluster.
2738 The .os, .swap and backup volumes are ignored. All other volumes are
2739 reported as unknown.
2741 @type reserved: L{ganeti.utils.FieldSet}
2742 @param reserved: a FieldSet of reserved volume names
2745 for node, n_img in node_image.items():
2746 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747 self.all_node_info[node].group != self.group_uuid):
2748 # skip non-healthy nodes
2750 for volume in n_img.volumes:
2751 test = ((node not in node_vol_should or
2752 volume not in node_vol_should[node]) and
2753 not reserved.Matches(volume))
2754 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755 "volume %s is unknown", volume)
2757 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758 """Verify N+1 Memory Resilience.
2760 Check that if one single node dies we can still start all the
2761 instances it was primary for.
2764 cluster_info = self.cfg.GetClusterInfo()
2765 for node, n_img in node_image.items():
2766 # This code checks that every node which is now listed as
2767 # secondary has enough memory to host all instances it is
2768 # supposed to should a single other node in the cluster fail.
2769 # FIXME: not ready for failover to an arbitrary node
2770 # FIXME: does not support file-backed instances
2771 # WARNING: we currently take into account down instances as well
2772 # as up ones, considering that even if they're down someone
2773 # might want to start them even in the event of a node failure.
2774 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775 # we're skipping nodes marked offline and nodes in other groups from
2776 # the N+1 warning, since most likely we don't have good memory
2777 # infromation from them; we already list instances living on such
2778 # nodes, and that's enough warning
2780 #TODO(dynmem): also consider ballooning out other instances
2781 for prinode, instances in n_img.sbp.items():
2783 for instance in instances:
2784 bep = cluster_info.FillBE(instance_cfg[instance])
2785 if bep[constants.BE_AUTO_BALANCE]:
2786 needed_mem += bep[constants.BE_MINMEM]
2787 test = n_img.mfree < needed_mem
2788 self._ErrorIf(test, constants.CV_ENODEN1, node,
2789 "not enough memory to accomodate instance failovers"
2790 " should node %s fail (%dMiB needed, %dMiB available)",
2791 prinode, needed_mem, n_img.mfree)
2794 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795 (files_all, files_opt, files_mc, files_vm)):
2796 """Verifies file checksums collected from all nodes.
2798 @param errorif: Callback for reporting errors
2799 @param nodeinfo: List of L{objects.Node} objects
2800 @param master_node: Name of master node
2801 @param all_nvinfo: RPC results
2804 # Define functions determining which nodes to consider for a file
2807 (files_mc, lambda node: (node.master_candidate or
2808 node.name == master_node)),
2809 (files_vm, lambda node: node.vm_capable),
2812 # Build mapping from filename to list of nodes which should have the file
2814 for (files, fn) in files2nodefn:
2816 filenodes = nodeinfo
2818 filenodes = filter(fn, nodeinfo)
2819 nodefiles.update((filename,
2820 frozenset(map(operator.attrgetter("name"), filenodes)))
2821 for filename in files)
2823 assert set(nodefiles) == (files_all | files_mc | files_vm)
2825 fileinfo = dict((filename, {}) for filename in nodefiles)
2826 ignore_nodes = set()
2828 for node in nodeinfo:
2830 ignore_nodes.add(node.name)
2833 nresult = all_nvinfo[node.name]
2835 if nresult.fail_msg or not nresult.payload:
2838 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840 for (key, value) in fingerprints.items())
2843 test = not (node_files and isinstance(node_files, dict))
2844 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845 "Node did not return file checksum data")
2847 ignore_nodes.add(node.name)
2850 # Build per-checksum mapping from filename to nodes having it
2851 for (filename, checksum) in node_files.items():
2852 assert filename in nodefiles
2853 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2855 for (filename, checksums) in fileinfo.items():
2856 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2858 # Nodes having the file
2859 with_file = frozenset(node_name
2860 for nodes in fileinfo[filename].values()
2861 for node_name in nodes) - ignore_nodes
2863 expected_nodes = nodefiles[filename] - ignore_nodes
2865 # Nodes missing file
2866 missing_file = expected_nodes - with_file
2868 if filename in files_opt:
2870 errorif(missing_file and missing_file != expected_nodes,
2871 constants.CV_ECLUSTERFILECHECK, None,
2872 "File %s is optional, but it must exist on all or no"
2873 " nodes (not found on %s)",
2874 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2876 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877 "File %s is missing from node(s) %s", filename,
2878 utils.CommaJoin(utils.NiceSort(missing_file)))
2880 # Warn if a node has a file it shouldn't
2881 unexpected = with_file - expected_nodes
2883 constants.CV_ECLUSTERFILECHECK, None,
2884 "File %s should not exist on node(s) %s",
2885 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2887 # See if there are multiple versions of the file
2888 test = len(checksums) > 1
2890 variants = ["variant %s on %s" %
2891 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892 for (idx, (checksum, nodes)) in
2893 enumerate(sorted(checksums.items()))]
2897 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898 "File %s found with %s different checksums (%s)",
2899 filename, len(checksums), "; ".join(variants))
2901 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2903 """Verifies and the node DRBD status.
2905 @type ninfo: L{objects.Node}
2906 @param ninfo: the node to check
2907 @param nresult: the remote results for the node
2908 @param instanceinfo: the dict of instances
2909 @param drbd_helper: the configured DRBD usermode helper
2910 @param drbd_map: the DRBD map as returned by
2911 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2915 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2918 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919 test = (helper_result is None)
2920 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921 "no drbd usermode helper returned")
2923 status, payload = helper_result
2925 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926 "drbd usermode helper check unsuccessful: %s", payload)
2927 test = status and (payload != drbd_helper)
2928 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929 "wrong drbd usermode helper: %s", payload)
2931 # compute the DRBD minors
2933 for minor, instance in drbd_map[node].items():
2934 test = instance not in instanceinfo
2935 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936 "ghost instance '%s' in temporary DRBD map", instance)
2937 # ghost instance should not be running, but otherwise we
2938 # don't give double warnings (both ghost instance and
2939 # unallocated minor in use)
2941 node_drbd[minor] = (instance, False)
2943 instance = instanceinfo[instance]
2944 node_drbd[minor] = (instance.name,
2945 instance.admin_state == constants.ADMINST_UP)
2947 # and now check them
2948 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949 test = not isinstance(used_minors, (tuple, list))
2950 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951 "cannot parse drbd status file: %s", str(used_minors))
2953 # we cannot check drbd status
2956 for minor, (iname, must_exist) in node_drbd.items():
2957 test = minor not in used_minors and must_exist
2958 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959 "drbd minor %d of instance %s is not active", minor, iname)
2960 for minor in used_minors:
2961 test = minor not in node_drbd
2962 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963 "unallocated drbd minor %d is in use", minor)
2965 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966 """Builds the node OS structures.
2968 @type ninfo: L{objects.Node}
2969 @param ninfo: the node to check
2970 @param nresult: the remote results for the node
2971 @param nimg: the node image object
2975 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2977 remote_os = nresult.get(constants.NV_OSLIST, None)
2978 test = (not isinstance(remote_os, list) or
2979 not compat.all(isinstance(v, list) and len(v) == 7
2980 for v in remote_os))
2982 _ErrorIf(test, constants.CV_ENODEOS, node,
2983 "node hasn't returned valid OS data")
2992 for (name, os_path, status, diagnose,
2993 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2995 if name not in os_dict:
2998 # parameters is a list of lists instead of list of tuples due to
2999 # JSON lacking a real tuple type, fix it:
3000 parameters = [tuple(v) for v in parameters]
3001 os_dict[name].append((os_path, status, diagnose,
3002 set(variants), set(parameters), set(api_ver)))
3004 nimg.oslist = os_dict
3006 def _VerifyNodeOS(self, ninfo, nimg, base):
3007 """Verifies the node OS list.
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nimg: the node image object
3012 @param base: the 'template' node we match against (e.g. from the master)
3016 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3020 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021 for os_name, os_data in nimg.oslist.items():
3022 assert os_data, "Empty OS status for OS %s?!" % os_name
3023 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027 "OS '%s' has multiple entries (first one shadows the rest): %s",
3028 os_name, utils.CommaJoin([v[0] for v in os_data]))
3029 # comparisons with the 'base' image
3030 test = os_name not in base.oslist
3031 _ErrorIf(test, constants.CV_ENODEOS, node,
3032 "Extra OS %s not present on reference node (%s)",
3036 assert base.oslist[os_name], "Base node has empty OS status?"
3037 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3039 # base OS is invalid, skipping
3041 for kind, a, b in [("API version", f_api, b_api),
3042 ("variants list", f_var, b_var),
3043 ("parameters", beautify_params(f_param),
3044 beautify_params(b_param))]:
3045 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047 kind, os_name, base.name,
3048 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3050 # check any missing OSes
3051 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052 _ErrorIf(missing, constants.CV_ENODEOS, node,
3053 "OSes present on reference node %s but missing on this node: %s",
3054 base.name, utils.CommaJoin(missing))
3056 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3059 @type ninfo: L{objects.Node}
3060 @param ninfo: the node to check
3061 @param nresult: the remote results for the node
3062 @type is_master: bool
3063 @param is_master: Whether node is the master node
3069 (constants.ENABLE_FILE_STORAGE or
3070 constants.ENABLE_SHARED_FILE_STORAGE)):
3072 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3074 # This should never happen
3075 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076 "Node did not return forbidden file storage paths")
3078 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079 "Found forbidden file storage paths: %s",
3080 utils.CommaJoin(fspaths))
3082 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083 constants.CV_ENODEFILESTORAGEPATHS, node,
3084 "Node should not have returned forbidden file storage"
3087 def _VerifyOob(self, ninfo, nresult):
3088 """Verifies out of band functionality of a node.
3090 @type ninfo: L{objects.Node}
3091 @param ninfo: the node to check
3092 @param nresult: the remote results for the node
3096 # We just have to verify the paths on master and/or master candidates
3097 # as the oob helper is invoked on the master
3098 if ((ninfo.master_candidate or ninfo.master_capable) and
3099 constants.NV_OOB_PATHS in nresult):
3100 for path_result in nresult[constants.NV_OOB_PATHS]:
3101 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3103 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104 """Verifies and updates the node volume data.
3106 This function will update a L{NodeImage}'s internal structures
3107 with data from the remote call.
3109 @type ninfo: L{objects.Node}
3110 @param ninfo: the node to check
3111 @param nresult: the remote results for the node
3112 @param nimg: the node image object
3113 @param vg_name: the configured VG name
3117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3119 nimg.lvm_fail = True
3120 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3123 elif isinstance(lvdata, basestring):
3124 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125 utils.SafeEncode(lvdata))
3126 elif not isinstance(lvdata, dict):
3127 _ErrorIf(True, constants.CV_ENODELVM, node,
3128 "rpc call to node failed (lvlist)")
3130 nimg.volumes = lvdata
3131 nimg.lvm_fail = False
3133 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134 """Verifies and updates the node instance list.
3136 If the listing was successful, then updates this node's instance
3137 list. Otherwise, it marks the RPC call as failed for the instance
3140 @type ninfo: L{objects.Node}
3141 @param ninfo: the node to check
3142 @param nresult: the remote results for the node
3143 @param nimg: the node image object
3146 idata = nresult.get(constants.NV_INSTANCELIST, None)
3147 test = not isinstance(idata, list)
3148 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149 "rpc call to node failed (instancelist): %s",
3150 utils.SafeEncode(str(idata)))
3152 nimg.hyp_fail = True
3154 nimg.instances = idata
3156 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157 """Verifies and computes a node information map
3159 @type ninfo: L{objects.Node}
3160 @param ninfo: the node to check
3161 @param nresult: the remote results for the node
3162 @param nimg: the node image object
3163 @param vg_name: the configured VG name
3167 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3169 # try to read free memory (from the hypervisor)
3170 hv_info = nresult.get(constants.NV_HVINFO, None)
3171 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172 _ErrorIf(test, constants.CV_ENODEHV, node,
3173 "rpc call to node failed (hvinfo)")
3176 nimg.mfree = int(hv_info["memory_free"])
3177 except (ValueError, TypeError):
3178 _ErrorIf(True, constants.CV_ENODERPC, node,
3179 "node returned invalid nodeinfo, check hypervisor")
3181 # FIXME: devise a free space model for file based instances as well
3182 if vg_name is not None:
3183 test = (constants.NV_VGLIST not in nresult or
3184 vg_name not in nresult[constants.NV_VGLIST])
3185 _ErrorIf(test, constants.CV_ENODELVM, node,
3186 "node didn't return data for the volume group '%s'"
3187 " - it is either missing or broken", vg_name)
3190 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191 except (ValueError, TypeError):
3192 _ErrorIf(True, constants.CV_ENODERPC, node,
3193 "node returned invalid LVM info, check LVM status")
3195 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196 """Gets per-disk status information for all instances.
3198 @type nodelist: list of strings
3199 @param nodelist: Node names
3200 @type node_image: dict of (name, L{objects.Node})
3201 @param node_image: Node objects
3202 @type instanceinfo: dict of (name, L{objects.Instance})
3203 @param instanceinfo: Instance objects
3204 @rtype: {instance: {node: [(succes, payload)]}}
3205 @return: a dictionary of per-instance dictionaries with nodes as
3206 keys and disk information as values; the disk information is a
3207 list of tuples (success, payload)
3210 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3213 node_disks_devonly = {}
3214 diskless_instances = set()
3215 diskless = constants.DT_DISKLESS
3217 for nname in nodelist:
3218 node_instances = list(itertools.chain(node_image[nname].pinst,
3219 node_image[nname].sinst))
3220 diskless_instances.update(inst for inst in node_instances
3221 if instanceinfo[inst].disk_template == diskless)
3222 disks = [(inst, disk)
3223 for inst in node_instances
3224 for disk in instanceinfo[inst].disks]
3227 # No need to collect data
3230 node_disks[nname] = disks
3232 # _AnnotateDiskParams makes already copies of the disks
3234 for (inst, dev) in disks:
3235 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236 self.cfg.SetDiskID(anno_disk, nname)
3237 devonly.append(anno_disk)
3239 node_disks_devonly[nname] = devonly
3241 assert len(node_disks) == len(node_disks_devonly)
3243 # Collect data from all nodes with disks
3244 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3247 assert len(result) == len(node_disks)
3251 for (nname, nres) in result.items():
3252 disks = node_disks[nname]
3255 # No data from this node
3256 data = len(disks) * [(False, "node offline")]
3259 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260 "while getting disk information: %s", msg)
3262 # No data from this node
3263 data = len(disks) * [(False, msg)]
3266 for idx, i in enumerate(nres.payload):
3267 if isinstance(i, (tuple, list)) and len(i) == 2:
3270 logging.warning("Invalid result from node %s, entry %d: %s",
3272 data.append((False, "Invalid result from the remote node"))
3274 for ((inst, _), status) in zip(disks, data):
3275 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3277 # Add empty entries for diskless instances.
3278 for inst in diskless_instances:
3279 assert inst not in instdisk
3282 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284 compat.all(isinstance(s, (tuple, list)) and
3285 len(s) == 2 for s in statuses)
3286 for inst, nnames in instdisk.items()
3287 for nname, statuses in nnames.items())
3289 instdisk_keys = set(instdisk)
3290 instanceinfo_keys = set(instanceinfo)
3291 assert instdisk_keys == instanceinfo_keys, \
3292 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293 (instdisk_keys, instanceinfo_keys))
3298 def _SshNodeSelector(group_uuid, all_nodes):
3299 """Create endless iterators for all potential SSH check hosts.
3302 nodes = [node for node in all_nodes
3303 if (node.group != group_uuid and
3305 keyfunc = operator.attrgetter("group")
3307 return map(itertools.cycle,
3308 [sorted(map(operator.attrgetter("name"), names))
3309 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3313 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314 """Choose which nodes should talk to which other nodes.
3316 We will make nodes contact all nodes in their group, and one node from
3319 @warning: This algorithm has a known issue if one node group is much
3320 smaller than others (e.g. just one node). In such a case all other
3321 nodes will talk to the single node.
3324 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3327 return (online_nodes,
3328 dict((name, sorted([i.next() for i in sel]))
3329 for name in online_nodes))
3331 def BuildHooksEnv(self):
3334 Cluster-Verify hooks just ran in the post phase and their failure makes
3335 the output be logged in the verify output and the verification to fail.
3339 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3342 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343 for node in self.my_node_info.values())
3347 def BuildHooksNodes(self):
3348 """Build hooks nodes.
3351 return ([], self.my_node_names)
3353 def Exec(self, feedback_fn):
3354 """Verify integrity of the node group, performing various test on nodes.
3357 # This method has too many local variables. pylint: disable=R0914
3358 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3360 if not self.my_node_names:
3362 feedback_fn("* Empty node group, skipping verification")
3366 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367 verbose = self.op.verbose
3368 self._feedback_fn = feedback_fn
3370 vg_name = self.cfg.GetVGName()
3371 drbd_helper = self.cfg.GetDRBDHelper()
3372 cluster = self.cfg.GetClusterInfo()
3373 hypervisors = cluster.enabled_hypervisors
3374 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3376 i_non_redundant = [] # Non redundant instances
3377 i_non_a_balanced = [] # Non auto-balanced instances
3378 i_offline = 0 # Count of offline instances
3379 n_offline = 0 # Count of offline nodes
3380 n_drained = 0 # Count of nodes being drained
3381 node_vol_should = {}
3383 # FIXME: verify OS list
3386 filemap = _ComputeAncillaryFiles(cluster, False)
3388 # do local checksums
3389 master_node = self.master_node = self.cfg.GetMasterNode()
3390 master_ip = self.cfg.GetMasterIP()
3392 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3395 if self.cfg.GetUseExternalMipScript():
3396 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3398 node_verify_param = {
3399 constants.NV_FILELIST:
3400 map(vcluster.MakeVirtualPath,
3401 utils.UniqueSequence(filename
3402 for files in filemap
3403 for filename in files)),
3404 constants.NV_NODELIST:
3405 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406 self.all_node_info.values()),
3407 constants.NV_HYPERVISOR: hypervisors,
3408 constants.NV_HVPARAMS:
3409 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411 for node in node_data_list
3412 if not node.offline],
3413 constants.NV_INSTANCELIST: hypervisors,
3414 constants.NV_VERSION: None,
3415 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416 constants.NV_NODESETUP: None,
3417 constants.NV_TIME: None,
3418 constants.NV_MASTERIP: (master_node, master_ip),
3419 constants.NV_OSLIST: None,
3420 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421 constants.NV_USERSCRIPTS: user_scripts,
3424 if vg_name is not None:
3425 node_verify_param[constants.NV_VGLIST] = None
3426 node_verify_param[constants.NV_LVLIST] = vg_name
3427 node_verify_param[constants.NV_PVLIST] = [vg_name]
3430 node_verify_param[constants.NV_DRBDLIST] = None
3431 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3433 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434 # Load file storage paths only from master node
3435 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3438 # FIXME: this needs to be changed per node-group, not cluster-wide
3440 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442 bridges.add(default_nicpp[constants.NIC_LINK])
3443 for instance in self.my_inst_info.values():
3444 for nic in instance.nics:
3445 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447 bridges.add(full_nic[constants.NIC_LINK])
3450 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3452 # Build our expected cluster state
3453 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3455 vm_capable=node.vm_capable))
3456 for node in node_data_list)
3460 for node in self.all_node_info.values():
3461 path = _SupportsOob(self.cfg, node)
3462 if path and path not in oob_paths:
3463 oob_paths.append(path)
3466 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3468 for instance in self.my_inst_names:
3469 inst_config = self.my_inst_info[instance]
3470 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3473 for nname in inst_config.all_nodes:
3474 if nname not in node_image:
3475 gnode = self.NodeImage(name=nname)
3476 gnode.ghost = (nname not in self.all_node_info)
3477 node_image[nname] = gnode
3479 inst_config.MapLVsByNode(node_vol_should)
3481 pnode = inst_config.primary_node
3482 node_image[pnode].pinst.append(instance)
3484 for snode in inst_config.secondary_nodes:
3485 nimg = node_image[snode]
3486 nimg.sinst.append(instance)
3487 if pnode not in nimg.sbp:
3488 nimg.sbp[pnode] = []
3489 nimg.sbp[pnode].append(instance)
3491 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492 # The value of exclusive_storage should be the same across the group, so if
3493 # it's True for at least a node, we act as if it were set for all the nodes
3494 self._exclusive_storage = compat.any(es_flags.values())
3495 if self._exclusive_storage:
3496 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3498 # At this point, we have the in-memory data structures complete,
3499 # except for the runtime information, which we'll gather next
3501 # Due to the way our RPC system works, exact response times cannot be
3502 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503 # time before and after executing the request, we can at least have a time
3505 nvinfo_starttime = time.time()
3506 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3508 self.cfg.GetClusterName())
3509 nvinfo_endtime = time.time()
3511 if self.extra_lv_nodes and vg_name is not None:
3513 self.rpc.call_node_verify(self.extra_lv_nodes,
3514 {constants.NV_LVLIST: vg_name},
3515 self.cfg.GetClusterName())
3517 extra_lv_nvinfo = {}
3519 all_drbd_map = self.cfg.ComputeDRBDMap()
3521 feedback_fn("* Gathering disk information (%s nodes)" %
3522 len(self.my_node_names))
3523 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3526 feedback_fn("* Verifying configuration file consistency")
3528 # If not all nodes are being checked, we need to make sure the master node
3529 # and a non-checked vm_capable node are in the list.
3530 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3532 vf_nvinfo = all_nvinfo.copy()
3533 vf_node_info = list(self.my_node_info.values())
3534 additional_nodes = []
3535 if master_node not in self.my_node_info:
3536 additional_nodes.append(master_node)
3537 vf_node_info.append(self.all_node_info[master_node])
3538 # Add the first vm_capable node we find which is not included,
3539 # excluding the master node (which we already have)
3540 for node in absent_nodes:
3541 nodeinfo = self.all_node_info[node]
3542 if (nodeinfo.vm_capable and not nodeinfo.offline and
3543 node != master_node):
3544 additional_nodes.append(node)
3545 vf_node_info.append(self.all_node_info[node])
3547 key = constants.NV_FILELIST
3548 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549 {key: node_verify_param[key]},
3550 self.cfg.GetClusterName()))
3552 vf_nvinfo = all_nvinfo
3553 vf_node_info = self.my_node_info.values()
3555 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3557 feedback_fn("* Verifying node status")
3561 for node_i in node_data_list:
3563 nimg = node_image[node]
3567 feedback_fn("* Skipping offline node %s" % (node,))
3571 if node == master_node:
3573 elif node_i.master_candidate:
3574 ntype = "master candidate"
3575 elif node_i.drained:
3581 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3583 msg = all_nvinfo[node].fail_msg
3584 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3587 nimg.rpc_fail = True
3590 nresult = all_nvinfo[node].payload
3592 nimg.call_ok = self._VerifyNode(node_i, nresult)
3593 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594 self._VerifyNodeNetwork(node_i, nresult)
3595 self._VerifyNodeUserScripts(node_i, nresult)
3596 self._VerifyOob(node_i, nresult)
3597 self._VerifyFileStoragePaths(node_i, nresult,
3598 node == master_node)
3601 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3605 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606 self._UpdateNodeInstances(node_i, nresult, nimg)
3607 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608 self._UpdateNodeOS(node_i, nresult, nimg)
3610 if not nimg.os_fail:
3611 if refos_img is None:
3613 self._VerifyNodeOS(node_i, nimg, refos_img)
3614 self._VerifyNodeBridges(node_i, nresult, bridges)
3616 # Check whether all running instancies are primary for the node. (This
3617 # can no longer be done from _VerifyInstance below, since some of the
3618 # wrong instances could be from other node groups.)
3619 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3621 for inst in non_primary_inst:
3622 test = inst in self.all_inst_info
3623 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624 "instance should not run on node %s", node_i.name)
3625 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626 "node is running unknown instance %s", inst)
3628 self._VerifyGroupLVM(node_image, vg_name)
3630 for node, result in extra_lv_nvinfo.items():
3631 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632 node_image[node], vg_name)
3634 feedback_fn("* Verifying instance status")
3635 for instance in self.my_inst_names:
3637 feedback_fn("* Verifying instance %s" % instance)
3638 inst_config = self.my_inst_info[instance]
3639 self._VerifyInstance(instance, inst_config, node_image,
3642 # If the instance is non-redundant we cannot survive losing its primary
3643 # node, so we are not N+1 compliant.
3644 if inst_config.disk_template not in constants.DTS_MIRRORED:
3645 i_non_redundant.append(instance)
3647 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648 i_non_a_balanced.append(instance)
3650 feedback_fn("* Verifying orphan volumes")
3651 reserved = utils.FieldSet(*cluster.reserved_lvs)
3653 # We will get spurious "unknown volume" warnings if any node of this group
3654 # is secondary for an instance whose primary is in another group. To avoid
3655 # them, we find these instances and add their volumes to node_vol_should.
3656 for inst in self.all_inst_info.values():
3657 for secondary in inst.secondary_nodes:
3658 if (secondary in self.my_node_info
3659 and inst.name not in self.my_inst_info):
3660 inst.MapLVsByNode(node_vol_should)
3663 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3665 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666 feedback_fn("* Verifying N+1 Memory redundancy")
3667 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3669 feedback_fn("* Other Notes")
3671 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3672 % len(i_non_redundant))
3674 if i_non_a_balanced:
3675 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3676 % len(i_non_a_balanced))
3679 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3682 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3685 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3689 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690 """Analyze the post-hooks' result
3692 This method analyses the hook result, handles it, and sends some
3693 nicely-formatted feedback back to the user.
3695 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697 @param hooks_results: the results of the multi-node hooks rpc call
3698 @param feedback_fn: function used send feedback back to the caller
3699 @param lu_result: previous Exec result
3700 @return: the new Exec result, based on the previous result
3704 # We only really run POST phase hooks, only for non-empty groups,
3705 # and are only interested in their results
3706 if not self.my_node_names:
3709 elif phase == constants.HOOKS_PHASE_POST:
3710 # Used to change hooks' output to proper indentation
3711 feedback_fn("* Hooks Results")
3712 assert hooks_results, "invalid result from hooks"
3714 for node_name in hooks_results:
3715 res = hooks_results[node_name]
3717 test = msg and not res.offline
3718 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719 "Communication failure in hooks execution: %s", msg)
3720 if res.offline or msg:
3721 # No need to investigate payload if node is offline or gave
3724 for script, hkr, output in res.payload:
3725 test = hkr == constants.HKR_FAIL
3726 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727 "Script %s failed, output:", script)
3729 output = self._HOOKS_INDENT_RE.sub(" ", output)
3730 feedback_fn("%s" % output)
3736 class LUClusterVerifyDisks(NoHooksLU):
3737 """Verifies the cluster disks status.
3742 def ExpandNames(self):
3743 self.share_locks = _ShareAll()
3744 self.needed_locks = {
3745 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3748 def Exec(self, feedback_fn):
3749 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3751 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753 for group in group_names])
3756 class LUGroupVerifyDisks(NoHooksLU):
3757 """Verifies the status of all disks in a node group.
3762 def ExpandNames(self):
3763 # Raises errors.OpPrereqError on its own if group can't be found
3764 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3766 self.share_locks = _ShareAll()
3767 self.needed_locks = {
3768 locking.LEVEL_INSTANCE: [],
3769 locking.LEVEL_NODEGROUP: [],
3770 locking.LEVEL_NODE: [],
3772 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773 # starts one instance of this opcode for every group, which means all
3774 # nodes will be locked for a short amount of time, so it's better to
3775 # acquire the node allocation lock as well.
3776 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3779 def DeclareLocks(self, level):
3780 if level == locking.LEVEL_INSTANCE:
3781 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3783 # Lock instances optimistically, needs verification once node and group
3784 # locks have been acquired
3785 self.needed_locks[locking.LEVEL_INSTANCE] = \
3786 self.cfg.GetNodeGroupInstances(self.group_uuid)
3788 elif level == locking.LEVEL_NODEGROUP:
3789 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3791 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792 set([self.group_uuid] +
3793 # Lock all groups used by instances optimistically; this requires
3794 # going via the node before it's locked, requiring verification
3797 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3800 elif level == locking.LEVEL_NODE:
3801 # This will only lock the nodes in the group to be verified which contain
3803 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804 self._LockInstancesNodes()
3806 # Lock all nodes in group to be verified
3807 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3811 def CheckPrereq(self):
3812 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3816 assert self.group_uuid in owned_groups
3818 # Check if locked instances are still correct
3819 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3821 # Get instance information
3822 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3824 # Check if node groups for locked instances are still correct
3825 _CheckInstancesNodeGroups(self.cfg, self.instances,
3826 owned_groups, owned_nodes, self.group_uuid)
3828 def Exec(self, feedback_fn):
3829 """Verify integrity of cluster disks.
3831 @rtype: tuple of three items
3832 @return: a tuple of (dict of node-to-node_error, list of instances
3833 which need activate-disks, dict of instance: (node, volume) for
3838 res_instances = set()
3841 nv_dict = _MapInstanceDisksToNodes(
3842 [inst for inst in self.instances.values()
3843 if inst.admin_state == constants.ADMINST_UP])
3846 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847 set(self.cfg.GetVmCapableNodeList()))
3849 node_lvs = self.rpc.call_lv_list(nodes, [])
3851 for (node, node_res) in node_lvs.items():
3852 if node_res.offline:
3855 msg = node_res.fail_msg
3857 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858 res_nodes[node] = msg
3861 for lv_name, (_, _, lv_online) in node_res.payload.items():
3862 inst = nv_dict.pop((node, lv_name), None)
3863 if not (lv_online or inst is None):
3864 res_instances.add(inst)
3866 # any leftover items in nv_dict are missing LVs, let's arrange the data
3868 for key, inst in nv_dict.iteritems():
3869 res_missing.setdefault(inst, []).append(list(key))
3871 return (res_nodes, list(res_instances), res_missing)
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875 """Verifies the cluster disks sizes.
3880 def ExpandNames(self):
3881 if self.op.instances:
3882 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883 # Not getting the node allocation lock as only a specific set of
3884 # instances (and their nodes) is going to be acquired
3885 self.needed_locks = {
3886 locking.LEVEL_NODE_RES: [],
3887 locking.LEVEL_INSTANCE: self.wanted_names,
3889 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3891 self.wanted_names = None
3892 self.needed_locks = {
3893 locking.LEVEL_NODE_RES: locking.ALL_SET,
3894 locking.LEVEL_INSTANCE: locking.ALL_SET,
3896 # This opcode is acquires the node locks for all instances
3897 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3900 self.share_locks = {
3901 locking.LEVEL_NODE_RES: 1,
3902 locking.LEVEL_INSTANCE: 0,
3903 locking.LEVEL_NODE_ALLOC: 1,
3906 def DeclareLocks(self, level):
3907 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908 self._LockInstancesNodes(primary_only=True, level=level)
3910 def CheckPrereq(self):
3911 """Check prerequisites.
3913 This only checks the optional instance list against the existing names.
3916 if self.wanted_names is None:
3917 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3919 self.wanted_instances = \
3920 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3922 def _EnsureChildSizes(self, disk):
3923 """Ensure children of the disk have the needed disk size.
3925 This is valid mainly for DRBD8 and fixes an issue where the
3926 children have smaller disk size.
3928 @param disk: an L{ganeti.objects.Disk} object
3931 if disk.dev_type == constants.LD_DRBD8:
3932 assert disk.children, "Empty children for DRBD8?"
3933 fchild = disk.children[0]
3934 mismatch = fchild.size < disk.size
3936 self.LogInfo("Child disk has size %d, parent %d, fixing",
3937 fchild.size, disk.size)
3938 fchild.size = disk.size
3940 # and we recurse on this child only, not on the metadev
3941 return self._EnsureChildSizes(fchild) or mismatch
3945 def Exec(self, feedback_fn):
3946 """Verify the size of cluster disks.
3949 # TODO: check child disks too
3950 # TODO: check differences in size between primary/secondary nodes
3952 for instance in self.wanted_instances:
3953 pnode = instance.primary_node
3954 if pnode not in per_node_disks:
3955 per_node_disks[pnode] = []
3956 for idx, disk in enumerate(instance.disks):
3957 per_node_disks[pnode].append((instance, idx, disk))
3959 assert not (frozenset(per_node_disks.keys()) -
3960 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961 "Not owning correct locks"
3962 assert not self.owned_locks(locking.LEVEL_NODE)
3965 for node, dskl in per_node_disks.items():
3966 newl = [v[2].Copy() for v in dskl]
3968 self.cfg.SetDiskID(dsk, node)
3969 result = self.rpc.call_blockdev_getsize(node, newl)
3971 self.LogWarning("Failure in blockdev_getsize call to node"
3972 " %s, ignoring", node)
3974 if len(result.payload) != len(dskl):
3975 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976 " result.payload=%s", node, len(dskl), result.payload)
3977 self.LogWarning("Invalid result from node %s, ignoring node results",
3980 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3982 self.LogWarning("Disk %d of instance %s did not return size"
3983 " information, ignoring", idx, instance.name)
3985 if not isinstance(size, (int, long)):
3986 self.LogWarning("Disk %d of instance %s did not return valid"
3987 " size information, ignoring", idx, instance.name)
3990 if size != disk.size:
3991 self.LogInfo("Disk %d of instance %s has mismatched size,"
3992 " correcting: recorded %d, actual %d", idx,
3993 instance.name, disk.size, size)
3995 self.cfg.Update(instance, feedback_fn)
3996 changed.append((instance.name, idx, size))
3997 if self._EnsureChildSizes(disk):
3998 self.cfg.Update(instance, feedback_fn)
3999 changed.append((instance.name, idx, disk.size))
4003 class LUClusterRename(LogicalUnit):
4004 """Rename the cluster.
4007 HPATH = "cluster-rename"
4008 HTYPE = constants.HTYPE_CLUSTER
4010 def BuildHooksEnv(self):
4015 "OP_TARGET": self.cfg.GetClusterName(),
4016 "NEW_NAME": self.op.name,
4019 def BuildHooksNodes(self):
4020 """Build hooks nodes.
4023 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4025 def CheckPrereq(self):
4026 """Verify that the passed name is a valid one.
4029 hostname = netutils.GetHostname(name=self.op.name,
4030 family=self.cfg.GetPrimaryIPFamily())
4032 new_name = hostname.name
4033 self.ip = new_ip = hostname.ip
4034 old_name = self.cfg.GetClusterName()
4035 old_ip = self.cfg.GetMasterIP()
4036 if new_name == old_name and new_ip == old_ip:
4037 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038 " cluster has changed",
4040 if new_ip != old_ip:
4041 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043 " reachable on the network" %
4044 new_ip, errors.ECODE_NOTUNIQUE)
4046 self.op.name = new_name
4048 def Exec(self, feedback_fn):
4049 """Rename the cluster.
4052 clustername = self.op.name
4055 # shutdown the master IP
4056 master_params = self.cfg.GetMasterNetworkParameters()
4057 ems = self.cfg.GetUseExternalMipScript()
4058 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4060 result.Raise("Could not disable the master role")
4063 cluster = self.cfg.GetClusterInfo()
4064 cluster.cluster_name = clustername
4065 cluster.master_ip = new_ip
4066 self.cfg.Update(cluster, feedback_fn)
4068 # update the known hosts file
4069 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070 node_list = self.cfg.GetOnlineNodeList()
4072 node_list.remove(master_params.name)
4075 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4077 master_params.ip = new_ip
4078 result = self.rpc.call_node_activate_master_ip(master_params.name,
4080 msg = result.fail_msg
4082 self.LogWarning("Could not re-enable the master role on"
4083 " the master, please restart manually: %s", msg)
4088 def _ValidateNetmask(cfg, netmask):
4089 """Checks if a netmask is valid.
4091 @type cfg: L{config.ConfigWriter}
4092 @param cfg: The cluster configuration
4094 @param netmask: the netmask to be verified
4095 @raise errors.OpPrereqError: if the validation fails
4098 ip_family = cfg.GetPrimaryIPFamily()
4100 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101 except errors.ProgrammerError:
4102 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103 ip_family, errors.ECODE_INVAL)
4104 if not ipcls.ValidateNetmask(netmask):
4105 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106 (netmask), errors.ECODE_INVAL)
4109 class LUClusterSetParams(LogicalUnit):
4110 """Change the parameters of the cluster.
4113 HPATH = "cluster-modify"
4114 HTYPE = constants.HTYPE_CLUSTER
4117 def CheckArguments(self):
4121 if self.op.uid_pool:
4122 uidpool.CheckUidPool(self.op.uid_pool)
4124 if self.op.add_uids:
4125 uidpool.CheckUidPool(self.op.add_uids)
4127 if self.op.remove_uids:
4128 uidpool.CheckUidPool(self.op.remove_uids)
4130 if self.op.master_netmask is not None:
4131 _ValidateNetmask(self.cfg, self.op.master_netmask)
4133 if self.op.diskparams:
4134 for dt_params in self.op.diskparams.values():
4135 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4137 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138 except errors.OpPrereqError, err:
4139 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4142 def ExpandNames(self):
4143 # FIXME: in the future maybe other cluster params won't require checking on
4144 # all nodes to be modified.
4145 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146 # resource locks the right thing, shouldn't it be the BGL instead?
4147 self.needed_locks = {
4148 locking.LEVEL_NODE: locking.ALL_SET,
4149 locking.LEVEL_INSTANCE: locking.ALL_SET,
4150 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4153 self.share_locks = _ShareAll()
4155 def BuildHooksEnv(self):
4160 "OP_TARGET": self.cfg.GetClusterName(),
4161 "NEW_VG_NAME": self.op.vg_name,
4164 def BuildHooksNodes(self):
4165 """Build hooks nodes.
4168 mn = self.cfg.GetMasterNode()
4171 def CheckPrereq(self):
4172 """Check prerequisites.
4174 This checks whether the given params don't conflict and
4175 if the given volume group is valid.
4178 if self.op.vg_name is not None and not self.op.vg_name:
4179 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181 " instances exist", errors.ECODE_INVAL)
4183 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185 raise errors.OpPrereqError("Cannot disable drbd helper while"
4186 " drbd-based instances exist",
4189 node_list = self.owned_locks(locking.LEVEL_NODE)
4191 # if vg_name not None, checks given volume group on all nodes
4193 vglist = self.rpc.call_vg_list(node_list)
4194 for node in node_list:
4195 msg = vglist[node].fail_msg
4197 # ignoring down node
4198 self.LogWarning("Error while gathering data on node %s"
4199 " (ignoring node): %s", node, msg)
4201 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4203 constants.MIN_VG_SIZE)
4205 raise errors.OpPrereqError("Error on node '%s': %s" %
4206 (node, vgstatus), errors.ECODE_ENVIRON)
4208 if self.op.drbd_helper:
4209 # checks given drbd helper on all nodes
4210 helpers = self.rpc.call_drbd_helper(node_list)
4211 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4213 self.LogInfo("Not checking drbd helper on offline node %s", node)
4215 msg = helpers[node].fail_msg
4217 raise errors.OpPrereqError("Error checking drbd helper on node"
4218 " '%s': %s" % (node, msg),
4219 errors.ECODE_ENVIRON)
4220 node_helper = helpers[node].payload
4221 if node_helper != self.op.drbd_helper:
4222 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223 (node, node_helper), errors.ECODE_ENVIRON)
4225 self.cluster = cluster = self.cfg.GetClusterInfo()
4226 # validate params changes
4227 if self.op.beparams:
4228 objects.UpgradeBeParams(self.op.beparams)
4229 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4232 if self.op.ndparams:
4233 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4236 # TODO: we need a more general way to handle resetting
4237 # cluster-level parameters to default values
4238 if self.new_ndparams["oob_program"] == "":
4239 self.new_ndparams["oob_program"] = \
4240 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4242 if self.op.hv_state:
4243 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244 self.cluster.hv_state_static)
4245 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246 for hv, values in new_hv_state.items())
4248 if self.op.disk_state:
4249 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250 self.cluster.disk_state_static)
4251 self.new_disk_state = \
4252 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253 for name, values in svalues.items()))
4254 for storage, svalues in new_disk_state.items())
4257 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4260 all_instances = self.cfg.GetAllInstancesInfo().values()
4262 for group in self.cfg.GetAllNodeGroupsInfo().values():
4263 instances = frozenset([inst for inst in all_instances
4264 if compat.any(node in group.members
4265 for node in inst.all_nodes)])
4266 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268 new = _ComputeNewInstanceViolations(ipol,
4269 new_ipolicy, instances)
4271 violations.update(new)
4274 self.LogWarning("After the ipolicy change the following instances"
4275 " violate them: %s",
4276 utils.CommaJoin(utils.NiceSort(violations)))
4278 if self.op.nicparams:
4279 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4284 # check all instances for consistency
4285 for instance in self.cfg.GetAllInstancesInfo().values():
4286 for nic_idx, nic in enumerate(instance.nics):
4287 params_copy = copy.deepcopy(nic.nicparams)
4288 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4290 # check parameter syntax
4292 objects.NIC.CheckParameterSyntax(params_filled)
4293 except errors.ConfigurationError, err:
4294 nic_errors.append("Instance %s, nic/%d: %s" %
4295 (instance.name, nic_idx, err))
4297 # if we're moving instances to routed, check that they have an ip
4298 target_mode = params_filled[constants.NIC_MODE]
4299 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301 " address" % (instance.name, nic_idx))
4303 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304 "\n".join(nic_errors), errors.ECODE_INVAL)
4306 # hypervisor list/parameters
4307 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308 if self.op.hvparams:
4309 for hv_name, hv_dict in self.op.hvparams.items():
4310 if hv_name not in self.new_hvparams:
4311 self.new_hvparams[hv_name] = hv_dict
4313 self.new_hvparams[hv_name].update(hv_dict)
4315 # disk template parameters
4316 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317 if self.op.diskparams:
4318 for dt_name, dt_params in self.op.diskparams.items():
4319 if dt_name not in self.op.diskparams:
4320 self.new_diskparams[dt_name] = dt_params
4322 self.new_diskparams[dt_name].update(dt_params)
4324 # os hypervisor parameters
4325 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4327 for os_name, hvs in self.op.os_hvp.items():
4328 if os_name not in self.new_os_hvp:
4329 self.new_os_hvp[os_name] = hvs
4331 for hv_name, hv_dict in hvs.items():
4333 # Delete if it exists
4334 self.new_os_hvp[os_name].pop(hv_name, None)
4335 elif hv_name not in self.new_os_hvp[os_name]:
4336 self.new_os_hvp[os_name][hv_name] = hv_dict
4338 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4341 self.new_osp = objects.FillDict(cluster.osparams, {})
4342 if self.op.osparams:
4343 for os_name, osp in self.op.osparams.items():
4344 if os_name not in self.new_osp:
4345 self.new_osp[os_name] = {}
4347 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4350 if not self.new_osp[os_name]:
4351 # we removed all parameters
4352 del self.new_osp[os_name]
4354 # check the parameter validity (remote check)
4355 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356 os_name, self.new_osp[os_name])
4358 # changes to the hypervisor list
4359 if self.op.enabled_hypervisors is not None:
4360 self.hv_list = self.op.enabled_hypervisors
4361 for hv in self.hv_list:
4362 # if the hypervisor doesn't already exist in the cluster
4363 # hvparams, we initialize it to empty, and then (in both
4364 # cases) we make sure to fill the defaults, as we might not
4365 # have a complete defaults list if the hypervisor wasn't
4367 if hv not in new_hvp:
4369 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4372 self.hv_list = cluster.enabled_hypervisors
4374 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375 # either the enabled list has changed, or the parameters have, validate
4376 for hv_name, hv_params in self.new_hvparams.items():
4377 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378 (self.op.enabled_hypervisors and
4379 hv_name in self.op.enabled_hypervisors)):
4380 # either this is a new hypervisor, or its parameters have changed
4381 hv_class = hypervisor.GetHypervisorClass(hv_name)
4382 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383 hv_class.CheckParameterSyntax(hv_params)
4384 _CheckHVParams(self, node_list, hv_name, hv_params)
4387 # no need to check any newly-enabled hypervisors, since the
4388 # defaults have already been checked in the above code-block
4389 for os_name, os_hvp in self.new_os_hvp.items():
4390 for hv_name, hv_params in os_hvp.items():
4391 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392 # we need to fill in the new os_hvp on top of the actual hv_p
4393 cluster_defaults = self.new_hvparams.get(hv_name, {})
4394 new_osp = objects.FillDict(cluster_defaults, hv_params)
4395 hv_class = hypervisor.GetHypervisorClass(hv_name)
4396 hv_class.CheckParameterSyntax(new_osp)
4397 _CheckHVParams(self, node_list, hv_name, new_osp)
4399 if self.op.default_iallocator:
4400 alloc_script = utils.FindFile(self.op.default_iallocator,
4401 constants.IALLOCATOR_SEARCH_PATH,
4403 if alloc_script is None:
4404 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405 " specified" % self.op.default_iallocator,
4408 def Exec(self, feedback_fn):
4409 """Change the parameters of the cluster.
4412 if self.op.vg_name is not None:
4413 new_volume = self.op.vg_name
4416 if new_volume != self.cfg.GetVGName():
4417 self.cfg.SetVGName(new_volume)
4419 feedback_fn("Cluster LVM configuration already in desired"
4420 " state, not changing")
4421 if self.op.drbd_helper is not None:
4422 new_helper = self.op.drbd_helper
4425 if new_helper != self.cfg.GetDRBDHelper():
4426 self.cfg.SetDRBDHelper(new_helper)
4428 feedback_fn("Cluster DRBD helper already in desired state,"
4430 if self.op.hvparams:
4431 self.cluster.hvparams = self.new_hvparams
4433 self.cluster.os_hvp = self.new_os_hvp
4434 if self.op.enabled_hypervisors is not None:
4435 self.cluster.hvparams = self.new_hvparams
4436 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437 if self.op.beparams:
4438 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439 if self.op.nicparams:
4440 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4442 self.cluster.ipolicy = self.new_ipolicy
4443 if self.op.osparams:
4444 self.cluster.osparams = self.new_osp
4445 if self.op.ndparams:
4446 self.cluster.ndparams = self.new_ndparams
4447 if self.op.diskparams:
4448 self.cluster.diskparams = self.new_diskparams
4449 if self.op.hv_state:
4450 self.cluster.hv_state_static = self.new_hv_state
4451 if self.op.disk_state:
4452 self.cluster.disk_state_static = self.new_disk_state
4454 if self.op.candidate_pool_size is not None:
4455 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456 # we need to update the pool size here, otherwise the save will fail
4457 _AdjustCandidatePool(self, [])
4459 if self.op.maintain_node_health is not None:
4460 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461 feedback_fn("Note: CONFD was disabled at build time, node health"
4462 " maintenance is not useful (still enabling it)")
4463 self.cluster.maintain_node_health = self.op.maintain_node_health
4465 if self.op.prealloc_wipe_disks is not None:
4466 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4468 if self.op.add_uids is not None:
4469 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4471 if self.op.remove_uids is not None:
4472 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4474 if self.op.uid_pool is not None:
4475 self.cluster.uid_pool = self.op.uid_pool
4477 if self.op.default_iallocator is not None:
4478 self.cluster.default_iallocator = self.op.default_iallocator
4480 if self.op.reserved_lvs is not None:
4481 self.cluster.reserved_lvs = self.op.reserved_lvs
4483 if self.op.use_external_mip_script is not None:
4484 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4486 def helper_os(aname, mods, desc):
4488 lst = getattr(self.cluster, aname)
4489 for key, val in mods:
4490 if key == constants.DDM_ADD:
4492 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4495 elif key == constants.DDM_REMOVE:
4499 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4501 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4503 if self.op.hidden_os:
4504 helper_os("hidden_os", self.op.hidden_os, "hidden")
4506 if self.op.blacklisted_os:
4507 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4509 if self.op.master_netdev:
4510 master_params = self.cfg.GetMasterNetworkParameters()
4511 ems = self.cfg.GetUseExternalMipScript()
4512 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513 self.cluster.master_netdev)
4514 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4516 result.Raise("Could not disable the master ip")
4517 feedback_fn("Changing master_netdev from %s to %s" %
4518 (master_params.netdev, self.op.master_netdev))
4519 self.cluster.master_netdev = self.op.master_netdev
4521 if self.op.master_netmask:
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524 result = self.rpc.call_node_change_master_netmask(master_params.name,
4525 master_params.netmask,
4526 self.op.master_netmask,
4528 master_params.netdev)
4530 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4533 self.cluster.master_netmask = self.op.master_netmask
4535 self.cfg.Update(self.cluster, feedback_fn)
4537 if self.op.master_netdev:
4538 master_params = self.cfg.GetMasterNetworkParameters()
4539 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540 self.op.master_netdev)
4541 ems = self.cfg.GetUseExternalMipScript()
4542 result = self.rpc.call_node_activate_master_ip(master_params.name,
4545 self.LogWarning("Could not re-enable the master ip on"
4546 " the master, please restart manually: %s",
4550 def _UploadHelper(lu, nodes, fname):
4551 """Helper for uploading a file and showing warnings.
4554 if os.path.exists(fname):
4555 result = lu.rpc.call_upload_file(nodes, fname)
4556 for to_node, to_result in result.items():
4557 msg = to_result.fail_msg
4559 msg = ("Copy of file %s to node %s failed: %s" %
4560 (fname, to_node, msg))
4564 def _ComputeAncillaryFiles(cluster, redist):
4565 """Compute files external to Ganeti which need to be consistent.
4567 @type redist: boolean
4568 @param redist: Whether to include files which need to be redistributed
4571 # Compute files for all nodes
4573 pathutils.SSH_KNOWN_HOSTS_FILE,
4574 pathutils.CONFD_HMAC_KEY,
4575 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576 pathutils.SPICE_CERT_FILE,
4577 pathutils.SPICE_CACERT_FILE,
4578 pathutils.RAPI_USERS_FILE,
4582 # we need to ship at least the RAPI certificate
4583 files_all.add(pathutils.RAPI_CERT_FILE)
4585 files_all.update(pathutils.ALL_CERT_FILES)
4586 files_all.update(ssconf.SimpleStore().GetFileList())
4588 if cluster.modify_etc_hosts:
4589 files_all.add(pathutils.ETC_HOSTS)
4591 if cluster.use_external_mip_script:
4592 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4594 # Files which are optional, these must:
4595 # - be present in one other category as well
4596 # - either exist or not exist on all nodes of that category (mc, vm all)
4598 pathutils.RAPI_USERS_FILE,
4601 # Files which should only be on master candidates
4605 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4609 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4613 # Files which should only be on VM-capable nodes
4616 for hv_name in cluster.enabled_hypervisors
4618 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4622 for hv_name in cluster.enabled_hypervisors
4624 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4626 # Filenames in each category must be unique
4627 all_files_set = files_all | files_mc | files_vm
4628 assert (len(all_files_set) ==
4629 sum(map(len, [files_all, files_mc, files_vm]))), \
4630 "Found file listed in more than one file list"
4632 # Optional files must be present in one other category
4633 assert all_files_set.issuperset(files_opt), \
4634 "Optional file not in a different required list"
4636 # This one file should never ever be re-distributed via RPC
4637 assert not (redist and
4638 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4640 return (files_all, files_opt, files_mc, files_vm)
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644 """Distribute additional files which are part of the cluster configuration.
4646 ConfigWriter takes care of distributing the config and ssconf files, but
4647 there are more files which should be distributed to all nodes. This function
4648 makes sure those are copied.
4650 @param lu: calling logical unit
4651 @param additional_nodes: list of nodes not in the config to distribute to
4652 @type additional_vm: boolean
4653 @param additional_vm: whether the additional nodes are vm-capable or not
4656 # Gather target nodes
4657 cluster = lu.cfg.GetClusterInfo()
4658 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4660 online_nodes = lu.cfg.GetOnlineNodeList()
4661 online_set = frozenset(online_nodes)
4662 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4664 if additional_nodes is not None:
4665 online_nodes.extend(additional_nodes)
4667 vm_nodes.extend(additional_nodes)
4669 # Never distribute to master node
4670 for nodelist in [online_nodes, vm_nodes]:
4671 if master_info.name in nodelist:
4672 nodelist.remove(master_info.name)
4675 (files_all, _, files_mc, files_vm) = \
4676 _ComputeAncillaryFiles(cluster, True)
4678 # Never re-distribute configuration file from here
4679 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680 pathutils.CLUSTER_CONF_FILE in files_vm)
4681 assert not files_mc, "Master candidates not handled in this function"
4684 (online_nodes, files_all),
4685 (vm_nodes, files_vm),
4689 for (node_list, files) in filemap:
4691 _UploadHelper(lu, node_list, fname)
4694 class LUClusterRedistConf(NoHooksLU):
4695 """Force the redistribution of cluster configuration.
4697 This is a very simple LU.
4702 def ExpandNames(self):
4703 self.needed_locks = {
4704 locking.LEVEL_NODE: locking.ALL_SET,
4705 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4707 self.share_locks = _ShareAll()
4709 def Exec(self, feedback_fn):
4710 """Redistribute the configuration.
4713 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714 _RedistributeAncillaryFiles(self)
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718 """Activate the master IP on the master node.
4721 def Exec(self, feedback_fn):
4722 """Activate the master IP.
4725 master_params = self.cfg.GetMasterNetworkParameters()
4726 ems = self.cfg.GetUseExternalMipScript()
4727 result = self.rpc.call_node_activate_master_ip(master_params.name,
4729 result.Raise("Could not activate the master IP")
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733 """Deactivate the master IP on the master node.
4736 def Exec(self, feedback_fn):
4737 """Deactivate the master IP.
4740 master_params = self.cfg.GetMasterNetworkParameters()
4741 ems = self.cfg.GetUseExternalMipScript()
4742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4744 result.Raise("Could not deactivate the master IP")
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748 """Sleep and poll for an instance's disk to sync.
4751 if not instance.disks or disks is not None and not disks:
4754 disks = _ExpandCheckDisks(instance, disks)
4757 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4759 node = instance.primary_node
4762 lu.cfg.SetDiskID(dev, node)
4764 # TODO: Convert to utils.Retry
4767 degr_retries = 10 # in seconds, as we sleep 1 second each time
4771 cumul_degraded = False
4772 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773 msg = rstats.fail_msg
4775 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4778 raise errors.RemoteError("Can't contact node %s for mirror data,"
4779 " aborting." % node)
4782 rstats = rstats.payload
4784 for i, mstat in enumerate(rstats):
4786 lu.LogWarning("Can't compute data for node %s/%s",
4787 node, disks[i].iv_name)
4790 cumul_degraded = (cumul_degraded or
4791 (mstat.is_degraded and mstat.sync_percent is None))
4792 if mstat.sync_percent is not None:
4794 if mstat.estimated_time is not None:
4795 rem_time = ("%s remaining (estimated)" %
4796 utils.FormatSeconds(mstat.estimated_time))
4797 max_time = mstat.estimated_time
4799 rem_time = "no time estimate"
4800 lu.LogInfo("- device %s: %5.2f%% done, %s",
4801 disks[i].iv_name, mstat.sync_percent, rem_time)
4803 # if we're done but degraded, let's do a few small retries, to
4804 # make sure we see a stable and not transient situation; therefore
4805 # we force restart of the loop
4806 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807 logging.info("Degraded disks found, %d retries left", degr_retries)
4815 time.sleep(min(60, max_time))
4818 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4820 return not cumul_degraded
4823 def _BlockdevFind(lu, node, dev, instance):
4824 """Wrapper around call_blockdev_find to annotate diskparams.
4826 @param lu: A reference to the lu object
4827 @param node: The node to call out
4828 @param dev: The device to find
4829 @param instance: The instance object the device belongs to
4830 @returns The result of the rpc call
4833 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834 return lu.rpc.call_blockdev_find(node, disk)
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838 """Wrapper around L{_CheckDiskConsistencyInner}.
4841 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4848 """Check that mirrors are not degraded.
4850 @attention: The device has to be annotated already.
4852 The ldisk parameter, if True, will change the test from the
4853 is_degraded attribute (which represents overall non-ok status for
4854 the device(s)) to the ldisk (representing the local storage status).
4857 lu.cfg.SetDiskID(dev, node)
4861 if on_primary or dev.AssembleOnSecondary():
4862 rstats = lu.rpc.call_blockdev_find(node, dev)
4863 msg = rstats.fail_msg
4865 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4867 elif not rstats.payload:
4868 lu.LogWarning("Can't find disk on node %s", node)
4872 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4874 result = result and not rstats.payload.is_degraded
4877 for child in dev.children:
4878 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4884 class LUOobCommand(NoHooksLU):
4885 """Logical unit for OOB handling.
4889 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4891 def ExpandNames(self):
4892 """Gather locks we need.
4895 if self.op.node_names:
4896 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897 lock_names = self.op.node_names
4899 lock_names = locking.ALL_SET
4901 self.needed_locks = {
4902 locking.LEVEL_NODE: lock_names,
4905 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4907 if not self.op.node_names:
4908 # Acquire node allocation lock only if all nodes are affected
4909 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4911 def CheckPrereq(self):
4912 """Check prerequisites.
4915 - the node exists in the configuration
4918 Any errors are signaled by raising errors.OpPrereqError.
4922 self.master_node = self.cfg.GetMasterNode()
4924 assert self.op.power_delay >= 0.0
4926 if self.op.node_names:
4927 if (self.op.command in self._SKIP_MASTER and
4928 self.master_node in self.op.node_names):
4929 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4932 if master_oob_handler:
4933 additional_text = ("run '%s %s %s' if you want to operate on the"
4934 " master regardless") % (master_oob_handler,
4938 additional_text = "it does not support out-of-band operations"
4940 raise errors.OpPrereqError(("Operating on the master node %s is not"
4941 " allowed for %s; %s") %
4942 (self.master_node, self.op.command,
4943 additional_text), errors.ECODE_INVAL)
4945 self.op.node_names = self.cfg.GetNodeList()
4946 if self.op.command in self._SKIP_MASTER:
4947 self.op.node_names.remove(self.master_node)
4949 if self.op.command in self._SKIP_MASTER:
4950 assert self.master_node not in self.op.node_names
4952 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4954 raise errors.OpPrereqError("Node %s not found" % node_name,
4957 self.nodes.append(node)
4959 if (not self.op.ignore_status and
4960 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962 " not marked offline") % node_name,
4965 def Exec(self, feedback_fn):
4966 """Execute OOB and return result if we expect any.
4969 master_node = self.master_node
4972 for idx, node in enumerate(utils.NiceSort(self.nodes,
4973 key=lambda node: node.name)):
4974 node_entry = [(constants.RS_NORMAL, node.name)]
4975 ret.append(node_entry)
4977 oob_program = _SupportsOob(self.cfg, node)
4980 node_entry.append((constants.RS_UNAVAIL, None))
4983 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984 self.op.command, oob_program, node.name)
4985 result = self.rpc.call_run_oob(master_node, oob_program,
4986 self.op.command, node.name,
4990 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991 node.name, result.fail_msg)
4992 node_entry.append((constants.RS_NODATA, None))
4995 self._CheckPayload(result)
4996 except errors.OpExecError, err:
4997 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4999 node_entry.append((constants.RS_NODATA, None))
5001 if self.op.command == constants.OOB_HEALTH:
5002 # For health we should log important events
5003 for item, status in result.payload:
5004 if status in [constants.OOB_STATUS_WARNING,
5005 constants.OOB_STATUS_CRITICAL]:
5006 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007 item, node.name, status)
5009 if self.op.command == constants.OOB_POWER_ON:
5011 elif self.op.command == constants.OOB_POWER_OFF:
5012 node.powered = False
5013 elif self.op.command == constants.OOB_POWER_STATUS:
5014 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015 if powered != node.powered:
5016 logging.warning(("Recorded power state (%s) of node '%s' does not"
5017 " match actual power state (%s)"), node.powered,
5020 # For configuration changing commands we should update the node
5021 if self.op.command in (constants.OOB_POWER_ON,
5022 constants.OOB_POWER_OFF):
5023 self.cfg.Update(node, feedback_fn)
5025 node_entry.append((constants.RS_NORMAL, result.payload))
5027 if (self.op.command == constants.OOB_POWER_ON and
5028 idx < len(self.nodes) - 1):
5029 time.sleep(self.op.power_delay)
5033 def _CheckPayload(self, result):
5034 """Checks if the payload is valid.
5036 @param result: RPC result
5037 @raises errors.OpExecError: If payload is not valid
5041 if self.op.command == constants.OOB_HEALTH:
5042 if not isinstance(result.payload, list):
5043 errs.append("command 'health' is expected to return a list but got %s" %
5044 type(result.payload))
5046 for item, status in result.payload:
5047 if status not in constants.OOB_STATUSES:
5048 errs.append("health item '%s' has invalid status '%s'" %
5051 if self.op.command == constants.OOB_POWER_STATUS:
5052 if not isinstance(result.payload, dict):
5053 errs.append("power-status is expected to return a dict but got %s" %
5054 type(result.payload))
5056 if self.op.command in [
5057 constants.OOB_POWER_ON,
5058 constants.OOB_POWER_OFF,
5059 constants.OOB_POWER_CYCLE,
5061 if result.payload is not None:
5062 errs.append("%s is expected to not return payload but got '%s'" %
5063 (self.op.command, result.payload))
5066 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067 utils.CommaJoin(errs))
5070 class _OsQuery(_QueryBase):
5071 FIELDS = query.OS_FIELDS
5073 def ExpandNames(self, lu):
5074 # Lock all nodes in shared mode
5075 # Temporary removal of locks, should be reverted later
5076 # TODO: reintroduce locks when they are lighter-weight
5077 lu.needed_locks = {}
5078 #self.share_locks[locking.LEVEL_NODE] = 1
5079 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5081 # The following variables interact with _QueryBase._GetNames
5083 self.wanted = self.names
5085 self.wanted = locking.ALL_SET
5087 self.do_locking = self.use_locking
5089 def DeclareLocks(self, lu, level):
5093 def _DiagnoseByOS(rlist):
5094 """Remaps a per-node return list into an a per-os per-node dictionary
5096 @param rlist: a map with node names as keys and OS objects as values
5099 @return: a dictionary with osnames as keys and as value another
5100 map, with nodes as keys and tuples of (path, status, diagnose,
5101 variants, parameters, api_versions) as values, eg::
5103 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104 (/srv/..., False, "invalid api")],
5105 "node2": [(/srv/..., True, "", [], [])]}
5110 # we build here the list of nodes that didn't fail the RPC (at RPC
5111 # level), so that nodes with a non-responding node daemon don't
5112 # make all OSes invalid
5113 good_nodes = [node_name for node_name in rlist
5114 if not rlist[node_name].fail_msg]
5115 for node_name, nr in rlist.items():
5116 if nr.fail_msg or not nr.payload:
5118 for (name, path, status, diagnose, variants,
5119 params, api_versions) in nr.payload:
5120 if name not in all_os:
5121 # build a list of nodes for this os containing empty lists
5122 # for each node in node_list
5124 for nname in good_nodes:
5125 all_os[name][nname] = []
5126 # convert params from [name, help] to (name, help)
5127 params = [tuple(v) for v in params]
5128 all_os[name][node_name].append((path, status, diagnose,
5129 variants, params, api_versions))
5132 def _GetQueryData(self, lu):
5133 """Computes the list of nodes and their attributes.
5136 # Locking is not used
5137 assert not (compat.any(lu.glm.is_owned(level)
5138 for level in locking.LEVELS
5139 if level != locking.LEVEL_CLUSTER) or
5140 self.do_locking or self.use_locking)
5142 valid_nodes = [node.name
5143 for node in lu.cfg.GetAllNodesInfo().values()
5144 if not node.offline and node.vm_capable]
5145 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146 cluster = lu.cfg.GetClusterInfo()
5150 for (os_name, os_data) in pol.items():
5151 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152 hidden=(os_name in cluster.hidden_os),
5153 blacklisted=(os_name in cluster.blacklisted_os))
5157 api_versions = set()
5159 for idx, osl in enumerate(os_data.values()):
5160 info.valid = bool(info.valid and osl and osl[0][1])
5164 (node_variants, node_params, node_api) = osl[0][3:6]
5167 variants.update(node_variants)
5168 parameters.update(node_params)
5169 api_versions.update(node_api)
5171 # Filter out inconsistent values
5172 variants.intersection_update(node_variants)
5173 parameters.intersection_update(node_params)
5174 api_versions.intersection_update(node_api)
5176 info.variants = list(variants)
5177 info.parameters = list(parameters)
5178 info.api_versions = list(api_versions)
5180 data[os_name] = info
5182 # Prepare data in requested order
5183 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5187 class LUOsDiagnose(NoHooksLU):
5188 """Logical unit for OS diagnose/query.
5194 def _BuildFilter(fields, names):
5195 """Builds a filter for querying OSes.
5198 name_filter = qlang.MakeSimpleFilter("name", names)
5200 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201 # respective field is not requested
5202 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203 for fname in ["hidden", "blacklisted"]
5204 if fname not in fields]
5205 if "valid" not in fields:
5206 status_filter.append([qlang.OP_TRUE, "valid"])
5209 status_filter.insert(0, qlang.OP_AND)
5211 status_filter = None
5213 if name_filter and status_filter:
5214 return [qlang.OP_AND, name_filter, status_filter]
5218 return status_filter
5220 def CheckArguments(self):
5221 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222 self.op.output_fields, False)
5224 def ExpandNames(self):
5225 self.oq.ExpandNames(self)
5227 def Exec(self, feedback_fn):
5228 return self.oq.OldStyleQuery(self)
5231 class _ExtStorageQuery(_QueryBase):
5232 FIELDS = query.EXTSTORAGE_FIELDS
5234 def ExpandNames(self, lu):
5235 # Lock all nodes in shared mode
5236 # Temporary removal of locks, should be reverted later
5237 # TODO: reintroduce locks when they are lighter-weight
5238 lu.needed_locks = {}
5239 #self.share_locks[locking.LEVEL_NODE] = 1
5240 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5242 # The following variables interact with _QueryBase._GetNames
5244 self.wanted = self.names
5246 self.wanted = locking.ALL_SET
5248 self.do_locking = self.use_locking
5250 def DeclareLocks(self, lu, level):
5254 def _DiagnoseByProvider(rlist):
5255 """Remaps a per-node return list into an a per-provider per-node dictionary
5257 @param rlist: a map with node names as keys and ExtStorage objects as values
5260 @return: a dictionary with extstorage providers as keys and as
5261 value another map, with nodes as keys and tuples of
5262 (path, status, diagnose, parameters) as values, eg::
5264 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265 "node2": [(/srv/..., False, "missing file")]
5266 "node3": [(/srv/..., True, "", [])]
5271 # we build here the list of nodes that didn't fail the RPC (at RPC
5272 # level), so that nodes with a non-responding node daemon don't
5273 # make all OSes invalid
5274 good_nodes = [node_name for node_name in rlist
5275 if not rlist[node_name].fail_msg]
5276 for node_name, nr in rlist.items():
5277 if nr.fail_msg or not nr.payload:
5279 for (name, path, status, diagnose, params) in nr.payload:
5280 if name not in all_es:
5281 # build a list of nodes for this os containing empty lists
5282 # for each node in node_list
5284 for nname in good_nodes:
5285 all_es[name][nname] = []
5286 # convert params from [name, help] to (name, help)
5287 params = [tuple(v) for v in params]
5288 all_es[name][node_name].append((path, status, diagnose, params))
5291 def _GetQueryData(self, lu):
5292 """Computes the list of nodes and their attributes.
5295 # Locking is not used
5296 assert not (compat.any(lu.glm.is_owned(level)
5297 for level in locking.LEVELS
5298 if level != locking.LEVEL_CLUSTER) or
5299 self.do_locking or self.use_locking)
5301 valid_nodes = [node.name
5302 for node in lu.cfg.GetAllNodesInfo().values()
5303 if not node.offline and node.vm_capable]
5304 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5308 nodegroup_list = lu.cfg.GetNodeGroupList()
5310 for (es_name, es_data) in pol.items():
5311 # For every provider compute the nodegroup validity.
5312 # To do this we need to check the validity of each node in es_data
5313 # and then construct the corresponding nodegroup dict:
5314 # { nodegroup1: status
5315 # nodegroup2: status
5318 for nodegroup in nodegroup_list:
5319 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5321 nodegroup_nodes = ndgrp.members
5322 nodegroup_name = ndgrp.name
5325 for node in nodegroup_nodes:
5326 if node in valid_nodes:
5327 if es_data[node] != []:
5328 node_status = es_data[node][0][1]
5329 node_statuses.append(node_status)
5331 node_statuses.append(False)
5333 if False in node_statuses:
5334 ndgrp_data[nodegroup_name] = False
5336 ndgrp_data[nodegroup_name] = True
5338 # Compute the provider's parameters
5340 for idx, esl in enumerate(es_data.values()):
5341 valid = bool(esl and esl[0][1])
5345 node_params = esl[0][3]
5348 parameters.update(node_params)
5350 # Filter out inconsistent values
5351 parameters.intersection_update(node_params)
5353 params = list(parameters)
5355 # Now fill all the info for this provider
5356 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357 nodegroup_status=ndgrp_data,
5360 data[es_name] = info
5362 # Prepare data in requested order
5363 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5367 class LUExtStorageDiagnose(NoHooksLU):
5368 """Logical unit for ExtStorage diagnose/query.
5373 def CheckArguments(self):
5374 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375 self.op.output_fields, False)
5377 def ExpandNames(self):
5378 self.eq.ExpandNames(self)
5380 def Exec(self, feedback_fn):
5381 return self.eq.OldStyleQuery(self)
5384 class LUNodeRemove(LogicalUnit):
5385 """Logical unit for removing a node.
5388 HPATH = "node-remove"
5389 HTYPE = constants.HTYPE_NODE
5391 def BuildHooksEnv(self):
5396 "OP_TARGET": self.op.node_name,
5397 "NODE_NAME": self.op.node_name,
5400 def BuildHooksNodes(self):
5401 """Build hooks nodes.
5403 This doesn't run on the target node in the pre phase as a failed
5404 node would then be impossible to remove.
5407 all_nodes = self.cfg.GetNodeList()
5409 all_nodes.remove(self.op.node_name)
5412 return (all_nodes, all_nodes)
5414 def CheckPrereq(self):
5415 """Check prerequisites.
5418 - the node exists in the configuration
5419 - it does not have primary or secondary instances
5420 - it's not the master
5422 Any errors are signaled by raising errors.OpPrereqError.
5425 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426 node = self.cfg.GetNodeInfo(self.op.node_name)
5427 assert node is not None
5429 masternode = self.cfg.GetMasterNode()
5430 if node.name == masternode:
5431 raise errors.OpPrereqError("Node is the master node, failover to another"
5432 " node is required", errors.ECODE_INVAL)
5434 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435 if node.name in instance.all_nodes:
5436 raise errors.OpPrereqError("Instance %s is still running on the node,"
5437 " please remove first" % instance_name,
5439 self.op.node_name = node.name
5442 def Exec(self, feedback_fn):
5443 """Removes the node from the cluster.
5447 logging.info("Stopping the node daemon and removing configs from node %s",
5450 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5452 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5455 # Promote nodes to master candidate as needed
5456 _AdjustCandidatePool(self, exceptions=[node.name])
5457 self.context.RemoveNode(node.name)
5459 # Run post hooks on the node before it's removed
5460 _RunPostHook(self, node.name)
5462 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463 msg = result.fail_msg
5465 self.LogWarning("Errors encountered on the remote node while leaving"
5466 " the cluster: %s", msg)
5468 # Remove node from our /etc/hosts
5469 if self.cfg.GetClusterInfo().modify_etc_hosts:
5470 master_node = self.cfg.GetMasterNode()
5471 result = self.rpc.call_etc_hosts_modify(master_node,
5472 constants.ETC_HOSTS_REMOVE,
5474 result.Raise("Can't update hosts file with new host data")
5475 _RedistributeAncillaryFiles(self)
5478 class _NodeQuery(_QueryBase):
5479 FIELDS = query.NODE_FIELDS
5481 def ExpandNames(self, lu):
5482 lu.needed_locks = {}
5483 lu.share_locks = _ShareAll()
5486 self.wanted = _GetWantedNodes(lu, self.names)
5488 self.wanted = locking.ALL_SET
5490 self.do_locking = (self.use_locking and
5491 query.NQ_LIVE in self.requested_data)
5494 # If any non-static field is requested we need to lock the nodes
5495 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5498 def DeclareLocks(self, lu, level):
5501 def _GetQueryData(self, lu):
5502 """Computes the list of nodes and their attributes.
5505 all_info = lu.cfg.GetAllNodesInfo()
5507 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5509 # Gather data as requested
5510 if query.NQ_LIVE in self.requested_data:
5511 # filter out non-vm_capable nodes
5512 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5514 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516 [lu.cfg.GetHypervisorType()], es_flags)
5517 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518 for (name, nresult) in node_data.items()
5519 if not nresult.fail_msg and nresult.payload)
5523 if query.NQ_INST in self.requested_data:
5524 node_to_primary = dict([(name, set()) for name in nodenames])
5525 node_to_secondary = dict([(name, set()) for name in nodenames])
5527 inst_data = lu.cfg.GetAllInstancesInfo()
5529 for inst in inst_data.values():
5530 if inst.primary_node in node_to_primary:
5531 node_to_primary[inst.primary_node].add(inst.name)
5532 for secnode in inst.secondary_nodes:
5533 if secnode in node_to_secondary:
5534 node_to_secondary[secnode].add(inst.name)
5536 node_to_primary = None
5537 node_to_secondary = None
5539 if query.NQ_OOB in self.requested_data:
5540 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541 for name, node in all_info.iteritems())
5545 if query.NQ_GROUP in self.requested_data:
5546 groups = lu.cfg.GetAllNodeGroupsInfo()
5550 return query.NodeQueryData([all_info[name] for name in nodenames],
5551 live_data, lu.cfg.GetMasterNode(),
5552 node_to_primary, node_to_secondary, groups,
5553 oob_support, lu.cfg.GetClusterInfo())
5556 class LUNodeQuery(NoHooksLU):
5557 """Logical unit for querying nodes.
5560 # pylint: disable=W0142
5563 def CheckArguments(self):
5564 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565 self.op.output_fields, self.op.use_locking)
5567 def ExpandNames(self):
5568 self.nq.ExpandNames(self)
5570 def DeclareLocks(self, level):
5571 self.nq.DeclareLocks(self, level)
5573 def Exec(self, feedback_fn):
5574 return self.nq.OldStyleQuery(self)
5577 class LUNodeQueryvols(NoHooksLU):
5578 """Logical unit for getting volumes on node(s).
5582 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583 _FIELDS_STATIC = utils.FieldSet("node")
5585 def CheckArguments(self):
5586 _CheckOutputFields(static=self._FIELDS_STATIC,
5587 dynamic=self._FIELDS_DYNAMIC,
5588 selected=self.op.output_fields)
5590 def ExpandNames(self):
5591 self.share_locks = _ShareAll()
5594 self.needed_locks = {
5595 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5598 self.needed_locks = {
5599 locking.LEVEL_NODE: locking.ALL_SET,
5600 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5603 def Exec(self, feedback_fn):
5604 """Computes the list of nodes and their attributes.
5607 nodenames = self.owned_locks(locking.LEVEL_NODE)
5608 volumes = self.rpc.call_node_volumes(nodenames)
5610 ilist = self.cfg.GetAllInstancesInfo()
5611 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5614 for node in nodenames:
5615 nresult = volumes[node]
5618 msg = nresult.fail_msg
5620 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5623 node_vols = sorted(nresult.payload,
5624 key=operator.itemgetter("dev"))
5626 for vol in node_vols:
5628 for field in self.op.output_fields:
5631 elif field == "phys":
5635 elif field == "name":
5637 elif field == "size":
5638 val = int(float(vol["size"]))
5639 elif field == "instance":
5640 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5642 raise errors.ParameterError(field)
5643 node_output.append(str(val))
5645 output.append(node_output)
5650 class LUNodeQueryStorage(NoHooksLU):
5651 """Logical unit for getting information on storage units on node(s).
5654 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5657 def CheckArguments(self):
5658 _CheckOutputFields(static=self._FIELDS_STATIC,
5659 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660 selected=self.op.output_fields)
5662 def ExpandNames(self):
5663 self.share_locks = _ShareAll()
5666 self.needed_locks = {
5667 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5670 self.needed_locks = {
5671 locking.LEVEL_NODE: locking.ALL_SET,
5672 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5675 def Exec(self, feedback_fn):
5676 """Computes the list of nodes and their attributes.
5679 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5681 # Always get name to sort by
5682 if constants.SF_NAME in self.op.output_fields:
5683 fields = self.op.output_fields[:]
5685 fields = [constants.SF_NAME] + self.op.output_fields
5687 # Never ask for node or type as it's only known to the LU
5688 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689 while extra in fields:
5690 fields.remove(extra)
5692 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693 name_idx = field_idx[constants.SF_NAME]
5695 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696 data = self.rpc.call_storage_list(self.nodes,
5697 self.op.storage_type, st_args,
5698 self.op.name, fields)
5702 for node in utils.NiceSort(self.nodes):
5703 nresult = data[node]
5707 msg = nresult.fail_msg
5709 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5712 rows = dict([(row[name_idx], row) for row in nresult.payload])
5714 for name in utils.NiceSort(rows.keys()):
5719 for field in self.op.output_fields:
5720 if field == constants.SF_NODE:
5722 elif field == constants.SF_TYPE:
5723 val = self.op.storage_type
5724 elif field in field_idx:
5725 val = row[field_idx[field]]
5727 raise errors.ParameterError(field)
5736 class _InstanceQuery(_QueryBase):
5737 FIELDS = query.INSTANCE_FIELDS
5739 def ExpandNames(self, lu):
5740 lu.needed_locks = {}
5741 lu.share_locks = _ShareAll()
5744 self.wanted = _GetWantedInstances(lu, self.names)
5746 self.wanted = locking.ALL_SET
5748 self.do_locking = (self.use_locking and
5749 query.IQ_LIVE in self.requested_data)
5751 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753 lu.needed_locks[locking.LEVEL_NODE] = []
5754 lu.needed_locks[locking.LEVEL_NETWORK] = []
5755 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5757 self.do_grouplocks = (self.do_locking and
5758 query.IQ_NODES in self.requested_data)
5760 def DeclareLocks(self, lu, level):
5762 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5763 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5765 # Lock all groups used by instances optimistically; this requires going
5766 # via the node before it's locked, requiring verification later on
5767 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5769 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5770 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5771 elif level == locking.LEVEL_NODE:
5772 lu._LockInstancesNodes() # pylint: disable=W0212
5774 elif level == locking.LEVEL_NETWORK:
5775 lu.needed_locks[locking.LEVEL_NETWORK] = \
5777 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5778 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5781 def _CheckGroupLocks(lu):
5782 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5783 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5785 # Check if node groups for locked instances are still correct
5786 for instance_name in owned_instances:
5787 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5789 def _GetQueryData(self, lu):
5790 """Computes the list of instances and their attributes.
5793 if self.do_grouplocks:
5794 self._CheckGroupLocks(lu)
5796 cluster = lu.cfg.GetClusterInfo()
5797 all_info = lu.cfg.GetAllInstancesInfo()
5799 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5801 instance_list = [all_info[name] for name in instance_names]
5802 nodes = frozenset(itertools.chain(*(inst.all_nodes
5803 for inst in instance_list)))
5804 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5807 wrongnode_inst = set()
5809 # Gather data as requested
5810 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5812 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5814 result = node_data[name]
5816 # offline nodes will be in both lists
5817 assert result.fail_msg
5818 offline_nodes.append(name)
5820 bad_nodes.append(name)
5821 elif result.payload:
5822 for inst in result.payload:
5823 if inst in all_info:
5824 if all_info[inst].primary_node == name:
5825 live_data.update(result.payload)
5827 wrongnode_inst.add(inst)
5829 # orphan instance; we don't list it here as we don't
5830 # handle this case yet in the output of instance listing
5831 logging.warning("Orphan instance '%s' found on node %s",
5833 # else no instance is alive
5837 if query.IQ_DISKUSAGE in self.requested_data:
5838 gmi = ganeti.masterd.instance
5839 disk_usage = dict((inst.name,
5840 gmi.ComputeDiskSize(inst.disk_template,
5841 [{constants.IDISK_SIZE: disk.size}
5842 for disk in inst.disks]))
5843 for inst in instance_list)
5847 if query.IQ_CONSOLE in self.requested_data:
5849 for inst in instance_list:
5850 if inst.name in live_data:
5851 # Instance is running
5852 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5854 consinfo[inst.name] = None
5855 assert set(consinfo.keys()) == set(instance_names)
5859 if query.IQ_NODES in self.requested_data:
5860 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5862 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5863 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5864 for uuid in set(map(operator.attrgetter("group"),
5870 if query.IQ_NETWORKS in self.requested_data:
5871 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5872 for i in instance_list))
5873 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5877 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5878 disk_usage, offline_nodes, bad_nodes,
5879 live_data, wrongnode_inst, consinfo,
5880 nodes, groups, networks)
5883 class LUQuery(NoHooksLU):
5884 """Query for resources/items of a certain kind.
5887 # pylint: disable=W0142
5890 def CheckArguments(self):
5891 qcls = _GetQueryImplementation(self.op.what)
5893 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5895 def ExpandNames(self):
5896 self.impl.ExpandNames(self)
5898 def DeclareLocks(self, level):
5899 self.impl.DeclareLocks(self, level)
5901 def Exec(self, feedback_fn):
5902 return self.impl.NewStyleQuery(self)
5905 class LUQueryFields(NoHooksLU):
5906 """Query for resources/items of a certain kind.
5909 # pylint: disable=W0142
5912 def CheckArguments(self):
5913 self.qcls = _GetQueryImplementation(self.op.what)
5915 def ExpandNames(self):
5916 self.needed_locks = {}
5918 def Exec(self, feedback_fn):
5919 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5922 class LUNodeModifyStorage(NoHooksLU):
5923 """Logical unit for modifying a storage volume on a node.
5928 def CheckArguments(self):
5929 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5931 storage_type = self.op.storage_type
5934 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5936 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5937 " modified" % storage_type,
5940 diff = set(self.op.changes.keys()) - modifiable
5942 raise errors.OpPrereqError("The following fields can not be modified for"
5943 " storage units of type '%s': %r" %
5944 (storage_type, list(diff)),
5947 def ExpandNames(self):
5948 self.needed_locks = {
5949 locking.LEVEL_NODE: self.op.node_name,
5952 def Exec(self, feedback_fn):
5953 """Computes the list of nodes and their attributes.
5956 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5957 result = self.rpc.call_storage_modify(self.op.node_name,
5958 self.op.storage_type, st_args,
5959 self.op.name, self.op.changes)
5960 result.Raise("Failed to modify storage unit '%s' on %s" %
5961 (self.op.name, self.op.node_name))
5964 class LUNodeAdd(LogicalUnit):
5965 """Logical unit for adding node to the cluster.
5969 HTYPE = constants.HTYPE_NODE
5970 _NFLAGS = ["master_capable", "vm_capable"]
5972 def CheckArguments(self):
5973 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5974 # validate/normalize the node name
5975 self.hostname = netutils.GetHostname(name=self.op.node_name,
5976 family=self.primary_ip_family)
5977 self.op.node_name = self.hostname.name
5979 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5980 raise errors.OpPrereqError("Cannot readd the master node",
5983 if self.op.readd and self.op.group:
5984 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5985 " being readded", errors.ECODE_INVAL)
5987 def BuildHooksEnv(self):
5990 This will run on all nodes before, and on all nodes + the new node after.
5994 "OP_TARGET": self.op.node_name,
5995 "NODE_NAME": self.op.node_name,
5996 "NODE_PIP": self.op.primary_ip,
5997 "NODE_SIP": self.op.secondary_ip,
5998 "MASTER_CAPABLE": str(self.op.master_capable),
5999 "VM_CAPABLE": str(self.op.vm_capable),
6002 def BuildHooksNodes(self):
6003 """Build hooks nodes.
6006 # Exclude added node
6007 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6008 post_nodes = pre_nodes + [self.op.node_name, ]
6010 return (pre_nodes, post_nodes)
6012 def CheckPrereq(self):
6013 """Check prerequisites.
6016 - the new node is not already in the config
6018 - its parameters (single/dual homed) matches the cluster
6020 Any errors are signaled by raising errors.OpPrereqError.
6024 hostname = self.hostname
6025 node = hostname.name
6026 primary_ip = self.op.primary_ip = hostname.ip
6027 if self.op.secondary_ip is None:
6028 if self.primary_ip_family == netutils.IP6Address.family:
6029 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6030 " IPv4 address must be given as secondary",
6032 self.op.secondary_ip = primary_ip
6034 secondary_ip = self.op.secondary_ip
6035 if not netutils.IP4Address.IsValid(secondary_ip):
6036 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6037 " address" % secondary_ip, errors.ECODE_INVAL)
6039 node_list = cfg.GetNodeList()
6040 if not self.op.readd and node in node_list:
6041 raise errors.OpPrereqError("Node %s is already in the configuration" %
6042 node, errors.ECODE_EXISTS)
6043 elif self.op.readd and node not in node_list:
6044 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6047 self.changed_primary_ip = False
6049 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6050 if self.op.readd and node == existing_node_name:
6051 if existing_node.secondary_ip != secondary_ip:
6052 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6053 " address configuration as before",
6055 if existing_node.primary_ip != primary_ip:
6056 self.changed_primary_ip = True
6060 if (existing_node.primary_ip == primary_ip or
6061 existing_node.secondary_ip == primary_ip or
6062 existing_node.primary_ip == secondary_ip or
6063 existing_node.secondary_ip == secondary_ip):
6064 raise errors.OpPrereqError("New node ip address(es) conflict with"
6065 " existing node %s" % existing_node.name,
6066 errors.ECODE_NOTUNIQUE)
6068 # After this 'if' block, None is no longer a valid value for the
6069 # _capable op attributes
6071 old_node = self.cfg.GetNodeInfo(node)
6072 assert old_node is not None, "Can't retrieve locked node %s" % node
6073 for attr in self._NFLAGS:
6074 if getattr(self.op, attr) is None:
6075 setattr(self.op, attr, getattr(old_node, attr))
6077 for attr in self._NFLAGS:
6078 if getattr(self.op, attr) is None:
6079 setattr(self.op, attr, True)
6081 if self.op.readd and not self.op.vm_capable:
6082 pri, sec = cfg.GetNodeInstances(node)
6084 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6085 " flag set to false, but it already holds"
6086 " instances" % node,
6089 # check that the type of the node (single versus dual homed) is the
6090 # same as for the master
6091 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092 master_singlehomed = myself.secondary_ip == myself.primary_ip
6093 newbie_singlehomed = secondary_ip == primary_ip
6094 if master_singlehomed != newbie_singlehomed:
6095 if master_singlehomed:
6096 raise errors.OpPrereqError("The master has no secondary ip but the"
6097 " new node has one",
6100 raise errors.OpPrereqError("The master has a secondary ip but the"
6101 " new node doesn't have one",
6104 # checks reachability
6105 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6106 raise errors.OpPrereqError("Node not reachable by ping",
6107 errors.ECODE_ENVIRON)
6109 if not newbie_singlehomed:
6110 # check reachability from my secondary ip to newbie's secondary ip
6111 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6112 source=myself.secondary_ip):
6113 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6114 " based ping to node daemon port",
6115 errors.ECODE_ENVIRON)
6122 if self.op.master_capable:
6123 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6125 self.master_candidate = False
6128 self.new_node = old_node
6130 node_group = cfg.LookupNodeGroup(self.op.group)
6131 self.new_node = objects.Node(name=node,
6132 primary_ip=primary_ip,
6133 secondary_ip=secondary_ip,
6134 master_candidate=self.master_candidate,
6135 offline=False, drained=False,
6136 group=node_group, ndparams={})
6138 if self.op.ndparams:
6139 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6140 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6141 "node", "cluster or group")
6143 if self.op.hv_state:
6144 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6146 if self.op.disk_state:
6147 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6149 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150 # it a property on the base class.
6151 rpcrunner = rpc.DnsOnlyRunner()
6152 result = rpcrunner.call_version([node])[node]
6153 result.Raise("Can't get version information from node %s" % node)
6154 if constants.PROTOCOL_VERSION == result.payload:
6155 logging.info("Communication to node %s fine, sw version %s match",
6156 node, result.payload)
6158 raise errors.OpPrereqError("Version mismatch master version %s,"
6159 " node version %s" %
6160 (constants.PROTOCOL_VERSION, result.payload),
6161 errors.ECODE_ENVIRON)
6163 vg_name = cfg.GetVGName()
6164 if vg_name is not None:
6165 vparams = {constants.NV_PVLIST: [vg_name]}
6166 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167 cname = self.cfg.GetClusterName()
6168 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6169 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6171 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6172 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6174 def Exec(self, feedback_fn):
6175 """Adds the new node to the cluster.
6178 new_node = self.new_node
6179 node = new_node.name
6181 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6184 # We adding a new node so we assume it's powered
6185 new_node.powered = True
6187 # for re-adds, reset the offline/drained/master-candidate flags;
6188 # we need to reset here, otherwise offline would prevent RPC calls
6189 # later in the procedure; this also means that if the re-add
6190 # fails, we are left with a non-offlined, broken node
6192 new_node.drained = new_node.offline = False # pylint: disable=W0201
6193 self.LogInfo("Readding a node, the offline/drained flags were reset")
6194 # if we demote the node, we do cleanup later in the procedure
6195 new_node.master_candidate = self.master_candidate
6196 if self.changed_primary_ip:
6197 new_node.primary_ip = self.op.primary_ip
6199 # copy the master/vm_capable flags
6200 for attr in self._NFLAGS:
6201 setattr(new_node, attr, getattr(self.op, attr))
6203 # notify the user about any possible mc promotion
6204 if new_node.master_candidate:
6205 self.LogInfo("Node will be a master candidate")
6207 if self.op.ndparams:
6208 new_node.ndparams = self.op.ndparams
6210 new_node.ndparams = {}
6212 if self.op.hv_state:
6213 new_node.hv_state_static = self.new_hv_state
6215 if self.op.disk_state:
6216 new_node.disk_state_static = self.new_disk_state
6218 # Add node to our /etc/hosts, and add key to known_hosts
6219 if self.cfg.GetClusterInfo().modify_etc_hosts:
6220 master_node = self.cfg.GetMasterNode()
6221 result = self.rpc.call_etc_hosts_modify(master_node,
6222 constants.ETC_HOSTS_ADD,
6225 result.Raise("Can't update hosts file with new host data")
6227 if new_node.secondary_ip != new_node.primary_ip:
6228 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6231 node_verify_list = [self.cfg.GetMasterNode()]
6232 node_verify_param = {
6233 constants.NV_NODELIST: ([node], {}),
6234 # TODO: do a node-net-test as well?
6237 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6238 self.cfg.GetClusterName())
6239 for verifier in node_verify_list:
6240 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6241 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6243 for failed in nl_payload:
6244 feedback_fn("ssh/hostname verification failed"
6245 " (checking from %s): %s" %
6246 (verifier, nl_payload[failed]))
6247 raise errors.OpExecError("ssh/hostname verification failed")
6250 _RedistributeAncillaryFiles(self)
6251 self.context.ReaddNode(new_node)
6252 # make sure we redistribute the config
6253 self.cfg.Update(new_node, feedback_fn)
6254 # and make sure the new node will not have old files around
6255 if not new_node.master_candidate:
6256 result = self.rpc.call_node_demote_from_mc(new_node.name)
6257 msg = result.fail_msg
6259 self.LogWarning("Node failed to demote itself from master"
6260 " candidate status: %s" % msg)
6262 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6263 additional_vm=self.op.vm_capable)
6264 self.context.AddNode(new_node, self.proc.GetECId())
6267 class LUNodeSetParams(LogicalUnit):
6268 """Modifies the parameters of a node.
6270 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6271 to the node role (as _ROLE_*)
6272 @cvar _R2F: a dictionary from node role to tuples of flags
6273 @cvar _FLAGS: a list of attribute names corresponding to the flags
6276 HPATH = "node-modify"
6277 HTYPE = constants.HTYPE_NODE
6279 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6281 (True, False, False): _ROLE_CANDIDATE,
6282 (False, True, False): _ROLE_DRAINED,
6283 (False, False, True): _ROLE_OFFLINE,
6284 (False, False, False): _ROLE_REGULAR,
6286 _R2F = dict((v, k) for k, v in _F2R.items())
6287 _FLAGS = ["master_candidate", "drained", "offline"]
6289 def CheckArguments(self):
6290 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6291 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6292 self.op.master_capable, self.op.vm_capable,
6293 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6295 if all_mods.count(None) == len(all_mods):
6296 raise errors.OpPrereqError("Please pass at least one modification",
6298 if all_mods.count(True) > 1:
6299 raise errors.OpPrereqError("Can't set the node into more than one"
6300 " state at the same time",
6303 # Boolean value that tells us whether we might be demoting from MC
6304 self.might_demote = (self.op.master_candidate is False or
6305 self.op.offline is True or
6306 self.op.drained is True or
6307 self.op.master_capable is False)
6309 if self.op.secondary_ip:
6310 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6311 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6312 " address" % self.op.secondary_ip,
6315 self.lock_all = self.op.auto_promote and self.might_demote
6316 self.lock_instances = self.op.secondary_ip is not None
6318 def _InstanceFilter(self, instance):
6319 """Filter for getting affected instances.
6322 return (instance.disk_template in constants.DTS_INT_MIRROR and
6323 self.op.node_name in instance.all_nodes)
6325 def ExpandNames(self):
6327 self.needed_locks = {
6328 locking.LEVEL_NODE: locking.ALL_SET,
6330 # Block allocations when all nodes are locked
6331 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6334 self.needed_locks = {
6335 locking.LEVEL_NODE: self.op.node_name,
6338 # Since modifying a node can have severe effects on currently running
6339 # operations the resource lock is at least acquired in shared mode
6340 self.needed_locks[locking.LEVEL_NODE_RES] = \
6341 self.needed_locks[locking.LEVEL_NODE]
6343 # Get all locks except nodes in shared mode; they are not used for anything
6344 # but read-only access
6345 self.share_locks = _ShareAll()
6346 self.share_locks[locking.LEVEL_NODE] = 0
6347 self.share_locks[locking.LEVEL_NODE_RES] = 0
6348 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6350 if self.lock_instances:
6351 self.needed_locks[locking.LEVEL_INSTANCE] = \
6352 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6354 def BuildHooksEnv(self):
6357 This runs on the master node.
6361 "OP_TARGET": self.op.node_name,
6362 "MASTER_CANDIDATE": str(self.op.master_candidate),
6363 "OFFLINE": str(self.op.offline),
6364 "DRAINED": str(self.op.drained),
6365 "MASTER_CAPABLE": str(self.op.master_capable),
6366 "VM_CAPABLE": str(self.op.vm_capable),
6369 def BuildHooksNodes(self):
6370 """Build hooks nodes.
6373 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6376 def CheckPrereq(self):
6377 """Check prerequisites.
6379 This only checks the instance list against the existing names.
6382 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6384 if self.lock_instances:
6385 affected_instances = \
6386 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6388 # Verify instance locks
6389 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6390 wanted_instances = frozenset(affected_instances.keys())
6391 if wanted_instances - owned_instances:
6392 raise errors.OpPrereqError("Instances affected by changing node %s's"
6393 " secondary IP address have changed since"
6394 " locks were acquired, wanted '%s', have"
6395 " '%s'; retry the operation" %
6397 utils.CommaJoin(wanted_instances),
6398 utils.CommaJoin(owned_instances)),
6401 affected_instances = None
6403 if (self.op.master_candidate is not None or
6404 self.op.drained is not None or
6405 self.op.offline is not None):
6406 # we can't change the master's node flags
6407 if self.op.node_name == self.cfg.GetMasterNode():
6408 raise errors.OpPrereqError("The master role can be changed"
6409 " only via master-failover",
6412 if self.op.master_candidate and not node.master_capable:
6413 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6414 " it a master candidate" % node.name,
6417 if self.op.vm_capable is False:
6418 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6420 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6421 " the vm_capable flag" % node.name,
6424 if node.master_candidate and self.might_demote and not self.lock_all:
6425 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6426 # check if after removing the current node, we're missing master
6428 (mc_remaining, mc_should, _) = \
6429 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6430 if mc_remaining < mc_should:
6431 raise errors.OpPrereqError("Not enough master candidates, please"
6432 " pass auto promote option to allow"
6433 " promotion (--auto-promote or RAPI"
6434 " auto_promote=True)", errors.ECODE_STATE)
6436 self.old_flags = old_flags = (node.master_candidate,
6437 node.drained, node.offline)
6438 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6439 self.old_role = old_role = self._F2R[old_flags]
6441 # Check for ineffective changes
6442 for attr in self._FLAGS:
6443 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6444 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6445 setattr(self.op, attr, None)
6447 # Past this point, any flag change to False means a transition
6448 # away from the respective state, as only real changes are kept
6450 # TODO: We might query the real power state if it supports OOB
6451 if _SupportsOob(self.cfg, node):
6452 if self.op.offline is False and not (node.powered or
6453 self.op.powered is True):
6454 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6455 " offline status can be reset") %
6456 self.op.node_name, errors.ECODE_STATE)
6457 elif self.op.powered is not None:
6458 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6459 " as it does not support out-of-band"
6460 " handling") % self.op.node_name,
6463 # If we're being deofflined/drained, we'll MC ourself if needed
6464 if (self.op.drained is False or self.op.offline is False or
6465 (self.op.master_capable and not node.master_capable)):
6466 if _DecideSelfPromotion(self):
6467 self.op.master_candidate = True
6468 self.LogInfo("Auto-promoting node to master candidate")
6470 # If we're no longer master capable, we'll demote ourselves from MC
6471 if self.op.master_capable is False and node.master_candidate:
6472 self.LogInfo("Demoting from master candidate")
6473 self.op.master_candidate = False
6476 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6477 if self.op.master_candidate:
6478 new_role = self._ROLE_CANDIDATE
6479 elif self.op.drained:
6480 new_role = self._ROLE_DRAINED
6481 elif self.op.offline:
6482 new_role = self._ROLE_OFFLINE
6483 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6484 # False is still in new flags, which means we're un-setting (the
6486 new_role = self._ROLE_REGULAR
6487 else: # no new flags, nothing, keep old role
6490 self.new_role = new_role
6492 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6493 # Trying to transition out of offline status
6494 result = self.rpc.call_version([node.name])[node.name]
6496 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6497 " to report its version: %s" %
6498 (node.name, result.fail_msg),
6501 self.LogWarning("Transitioning node from offline to online state"
6502 " without using re-add. Please make sure the node"
6505 # When changing the secondary ip, verify if this is a single-homed to
6506 # multi-homed transition or vice versa, and apply the relevant
6508 if self.op.secondary_ip:
6509 # Ok even without locking, because this can't be changed by any LU
6510 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6511 master_singlehomed = master.secondary_ip == master.primary_ip
6512 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6513 if self.op.force and node.name == master.name:
6514 self.LogWarning("Transitioning from single-homed to multi-homed"
6515 " cluster; all nodes will require a secondary IP"
6518 raise errors.OpPrereqError("Changing the secondary ip on a"
6519 " single-homed cluster requires the"
6520 " --force option to be passed, and the"
6521 " target node to be the master",
6523 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6524 if self.op.force and node.name == master.name:
6525 self.LogWarning("Transitioning from multi-homed to single-homed"
6526 " cluster; secondary IP addresses will have to be"
6529 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6530 " same as the primary IP on a multi-homed"
6531 " cluster, unless the --force option is"
6532 " passed, and the target node is the"
6533 " master", errors.ECODE_INVAL)
6535 assert not (frozenset(affected_instances) -
6536 self.owned_locks(locking.LEVEL_INSTANCE))
6539 if affected_instances:
6540 msg = ("Cannot change secondary IP address: offline node has"
6541 " instances (%s) configured to use it" %
6542 utils.CommaJoin(affected_instances.keys()))
6543 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6545 # On online nodes, check that no instances are running, and that
6546 # the node has the new ip and we can reach it.
6547 for instance in affected_instances.values():
6548 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6549 msg="cannot change secondary ip")
6551 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6552 if master.name != node.name:
6553 # check reachability from master secondary ip to new secondary ip
6554 if not netutils.TcpPing(self.op.secondary_ip,
6555 constants.DEFAULT_NODED_PORT,
6556 source=master.secondary_ip):
6557 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6558 " based ping to node daemon port",
6559 errors.ECODE_ENVIRON)
6561 if self.op.ndparams:
6562 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6563 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6564 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6565 "node", "cluster or group")
6566 self.new_ndparams = new_ndparams
6568 if self.op.hv_state:
6569 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6570 self.node.hv_state_static)
6572 if self.op.disk_state:
6573 self.new_disk_state = \
6574 _MergeAndVerifyDiskState(self.op.disk_state,
6575 self.node.disk_state_static)
6577 def Exec(self, feedback_fn):
6582 old_role = self.old_role
6583 new_role = self.new_role
6587 if self.op.ndparams:
6588 node.ndparams = self.new_ndparams
6590 if self.op.powered is not None:
6591 node.powered = self.op.powered
6593 if self.op.hv_state:
6594 node.hv_state_static = self.new_hv_state
6596 if self.op.disk_state:
6597 node.disk_state_static = self.new_disk_state
6599 for attr in ["master_capable", "vm_capable"]:
6600 val = getattr(self.op, attr)
6602 setattr(node, attr, val)
6603 result.append((attr, str(val)))
6605 if new_role != old_role:
6606 # Tell the node to demote itself, if no longer MC and not offline
6607 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6608 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6610 self.LogWarning("Node failed to demote itself: %s", msg)
6612 new_flags = self._R2F[new_role]
6613 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6615 result.append((desc, str(nf)))
6616 (node.master_candidate, node.drained, node.offline) = new_flags
6618 # we locked all nodes, we adjust the CP before updating this node
6620 _AdjustCandidatePool(self, [node.name])
6622 if self.op.secondary_ip:
6623 node.secondary_ip = self.op.secondary_ip
6624 result.append(("secondary_ip", self.op.secondary_ip))
6626 # this will trigger configuration file update, if needed
6627 self.cfg.Update(node, feedback_fn)
6629 # this will trigger job queue propagation or cleanup if the mc
6631 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6632 self.context.ReaddNode(node)
6637 class LUNodePowercycle(NoHooksLU):
6638 """Powercycles a node.
6643 def CheckArguments(self):
6644 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6645 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6646 raise errors.OpPrereqError("The node is the master and the force"
6647 " parameter was not set",
6650 def ExpandNames(self):
6651 """Locking for PowercycleNode.
6653 This is a last-resort option and shouldn't block on other
6654 jobs. Therefore, we grab no locks.
6657 self.needed_locks = {}
6659 def Exec(self, feedback_fn):
6663 result = self.rpc.call_node_powercycle(self.op.node_name,
6664 self.cfg.GetHypervisorType())
6665 result.Raise("Failed to schedule the reboot")
6666 return result.payload
6669 class LUClusterQuery(NoHooksLU):
6670 """Query cluster configuration.
6675 def ExpandNames(self):
6676 self.needed_locks = {}
6678 def Exec(self, feedback_fn):
6679 """Return cluster config.
6682 cluster = self.cfg.GetClusterInfo()
6685 # Filter just for enabled hypervisors
6686 for os_name, hv_dict in cluster.os_hvp.items():
6687 os_hvp[os_name] = {}
6688 for hv_name, hv_params in hv_dict.items():
6689 if hv_name in cluster.enabled_hypervisors:
6690 os_hvp[os_name][hv_name] = hv_params
6692 # Convert ip_family to ip_version
6693 primary_ip_version = constants.IP4_VERSION
6694 if cluster.primary_ip_family == netutils.IP6Address.family:
6695 primary_ip_version = constants.IP6_VERSION
6698 "software_version": constants.RELEASE_VERSION,
6699 "protocol_version": constants.PROTOCOL_VERSION,
6700 "config_version": constants.CONFIG_VERSION,
6701 "os_api_version": max(constants.OS_API_VERSIONS),
6702 "export_version": constants.EXPORT_VERSION,
6703 "architecture": runtime.GetArchInfo(),
6704 "name": cluster.cluster_name,
6705 "master": cluster.master_node,
6706 "default_hypervisor": cluster.primary_hypervisor,
6707 "enabled_hypervisors": cluster.enabled_hypervisors,
6708 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6709 for hypervisor_name in cluster.enabled_hypervisors]),
6711 "beparams": cluster.beparams,
6712 "osparams": cluster.osparams,
6713 "ipolicy": cluster.ipolicy,
6714 "nicparams": cluster.nicparams,
6715 "ndparams": cluster.ndparams,
6716 "diskparams": cluster.diskparams,
6717 "candidate_pool_size": cluster.candidate_pool_size,
6718 "master_netdev": cluster.master_netdev,
6719 "master_netmask": cluster.master_netmask,
6720 "use_external_mip_script": cluster.use_external_mip_script,
6721 "volume_group_name": cluster.volume_group_name,
6722 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6723 "file_storage_dir": cluster.file_storage_dir,
6724 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6725 "maintain_node_health": cluster.maintain_node_health,
6726 "ctime": cluster.ctime,
6727 "mtime": cluster.mtime,
6728 "uuid": cluster.uuid,
6729 "tags": list(cluster.GetTags()),
6730 "uid_pool": cluster.uid_pool,
6731 "default_iallocator": cluster.default_iallocator,
6732 "reserved_lvs": cluster.reserved_lvs,
6733 "primary_ip_version": primary_ip_version,
6734 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6735 "hidden_os": cluster.hidden_os,
6736 "blacklisted_os": cluster.blacklisted_os,
6742 class LUClusterConfigQuery(NoHooksLU):
6743 """Return configuration values.
6748 def CheckArguments(self):
6749 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6751 def ExpandNames(self):
6752 self.cq.ExpandNames(self)
6754 def DeclareLocks(self, level):
6755 self.cq.DeclareLocks(self, level)
6757 def Exec(self, feedback_fn):
6758 result = self.cq.OldStyleQuery(self)
6760 assert len(result) == 1
6765 class _ClusterQuery(_QueryBase):
6766 FIELDS = query.CLUSTER_FIELDS
6768 #: Do not sort (there is only one item)
6771 def ExpandNames(self, lu):
6772 lu.needed_locks = {}
6774 # The following variables interact with _QueryBase._GetNames
6775 self.wanted = locking.ALL_SET
6776 self.do_locking = self.use_locking
6779 raise errors.OpPrereqError("Can not use locking for cluster queries",
6782 def DeclareLocks(self, lu, level):
6785 def _GetQueryData(self, lu):
6786 """Computes the list of nodes and their attributes.
6789 # Locking is not used
6790 assert not (compat.any(lu.glm.is_owned(level)
6791 for level in locking.LEVELS
6792 if level != locking.LEVEL_CLUSTER) or
6793 self.do_locking or self.use_locking)
6795 if query.CQ_CONFIG in self.requested_data:
6796 cluster = lu.cfg.GetClusterInfo()
6798 cluster = NotImplemented
6800 if query.CQ_QUEUE_DRAINED in self.requested_data:
6801 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6803 drain_flag = NotImplemented
6805 if query.CQ_WATCHER_PAUSE in self.requested_data:
6806 master_name = lu.cfg.GetMasterNode()
6808 result = lu.rpc.call_get_watcher_pause(master_name)
6809 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6812 watcher_pause = result.payload
6814 watcher_pause = NotImplemented
6816 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6819 class LUInstanceActivateDisks(NoHooksLU):
6820 """Bring up an instance's disks.
6825 def ExpandNames(self):
6826 self._ExpandAndLockInstance()
6827 self.needed_locks[locking.LEVEL_NODE] = []
6828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6830 def DeclareLocks(self, level):
6831 if level == locking.LEVEL_NODE:
6832 self._LockInstancesNodes()
6834 def CheckPrereq(self):
6835 """Check prerequisites.
6837 This checks that the instance is in the cluster.
6840 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841 assert self.instance is not None, \
6842 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 _CheckNodeOnline(self, self.instance.primary_node)
6845 def Exec(self, feedback_fn):
6846 """Activate the disks.
6849 disks_ok, disks_info = \
6850 _AssembleInstanceDisks(self, self.instance,
6851 ignore_size=self.op.ignore_size)
6853 raise errors.OpExecError("Cannot activate block devices")
6855 if self.op.wait_for_sync:
6856 if not _WaitForSync(self, self.instance):
6857 raise errors.OpExecError("Some disks of the instance are degraded!")
6862 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6864 """Prepare the block devices for an instance.
6866 This sets up the block devices on all nodes.
6868 @type lu: L{LogicalUnit}
6869 @param lu: the logical unit on whose behalf we execute
6870 @type instance: L{objects.Instance}
6871 @param instance: the instance for whose disks we assemble
6872 @type disks: list of L{objects.Disk} or None
6873 @param disks: which disks to assemble (or all, if None)
6874 @type ignore_secondaries: boolean
6875 @param ignore_secondaries: if true, errors on secondary nodes
6876 won't result in an error return from the function
6877 @type ignore_size: boolean
6878 @param ignore_size: if true, the current known size of the disk
6879 will not be used during the disk activation, useful for cases
6880 when the size is wrong
6881 @return: False if the operation failed, otherwise a list of
6882 (host, instance_visible_name, node_visible_name)
6883 with the mapping from node devices to instance devices
6888 iname = instance.name
6889 disks = _ExpandCheckDisks(instance, disks)
6891 # With the two passes mechanism we try to reduce the window of
6892 # opportunity for the race condition of switching DRBD to primary
6893 # before handshaking occured, but we do not eliminate it
6895 # The proper fix would be to wait (with some limits) until the
6896 # connection has been made and drbd transitions from WFConnection
6897 # into any other network-connected state (Connected, SyncTarget,
6900 # 1st pass, assemble on all nodes in secondary mode
6901 for idx, inst_disk in enumerate(disks):
6902 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6904 node_disk = node_disk.Copy()
6905 node_disk.UnsetSize()
6906 lu.cfg.SetDiskID(node_disk, node)
6907 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6909 msg = result.fail_msg
6911 is_offline_secondary = (node in instance.secondary_nodes and
6913 lu.LogWarning("Could not prepare block device %s on node %s"
6914 " (is_primary=False, pass=1): %s",
6915 inst_disk.iv_name, node, msg)
6916 if not (ignore_secondaries or is_offline_secondary):
6919 # FIXME: race condition on drbd migration to primary
6921 # 2nd pass, do only the primary node
6922 for idx, inst_disk in enumerate(disks):
6925 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6926 if node != instance.primary_node:
6929 node_disk = node_disk.Copy()
6930 node_disk.UnsetSize()
6931 lu.cfg.SetDiskID(node_disk, node)
6932 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6934 msg = result.fail_msg
6936 lu.LogWarning("Could not prepare block device %s on node %s"
6937 " (is_primary=True, pass=2): %s",
6938 inst_disk.iv_name, node, msg)
6941 dev_path = result.payload
6943 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6945 # leave the disks configured for the primary node
6946 # this is a workaround that would be fixed better by
6947 # improving the logical/physical id handling
6949 lu.cfg.SetDiskID(disk, instance.primary_node)
6951 return disks_ok, device_info
6954 def _StartInstanceDisks(lu, instance, force):
6955 """Start the disks of an instance.
6958 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6959 ignore_secondaries=force)
6961 _ShutdownInstanceDisks(lu, instance)
6962 if force is not None and not force:
6964 hint=("If the message above refers to a secondary node,"
6965 " you can retry the operation using '--force'"))
6966 raise errors.OpExecError("Disk consistency error")
6969 class LUInstanceDeactivateDisks(NoHooksLU):
6970 """Shutdown an instance's disks.
6975 def ExpandNames(self):
6976 self._ExpandAndLockInstance()
6977 self.needed_locks[locking.LEVEL_NODE] = []
6978 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6980 def DeclareLocks(self, level):
6981 if level == locking.LEVEL_NODE:
6982 self._LockInstancesNodes()
6984 def CheckPrereq(self):
6985 """Check prerequisites.
6987 This checks that the instance is in the cluster.
6990 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991 assert self.instance is not None, \
6992 "Cannot retrieve locked instance %s" % self.op.instance_name
6994 def Exec(self, feedback_fn):
6995 """Deactivate the disks
6998 instance = self.instance
7000 _ShutdownInstanceDisks(self, instance)
7002 _SafeShutdownInstanceDisks(self, instance)
7005 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7006 """Shutdown block devices of an instance.
7008 This function checks if an instance is running, before calling
7009 _ShutdownInstanceDisks.
7012 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7013 _ShutdownInstanceDisks(lu, instance, disks=disks)
7016 def _ExpandCheckDisks(instance, disks):
7017 """Return the instance disks selected by the disks list
7019 @type disks: list of L{objects.Disk} or None
7020 @param disks: selected disks
7021 @rtype: list of L{objects.Disk}
7022 @return: selected instance disks to act on
7026 return instance.disks
7028 if not set(disks).issubset(instance.disks):
7029 raise errors.ProgrammerError("Can only act on disks belonging to the"
7034 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7035 """Shutdown block devices of an instance.
7037 This does the shutdown on all nodes of the instance.
7039 If the ignore_primary is false, errors on the primary node are
7044 disks = _ExpandCheckDisks(instance, disks)
7047 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7048 lu.cfg.SetDiskID(top_disk, node)
7049 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7050 msg = result.fail_msg
7052 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7053 disk.iv_name, node, msg)
7054 if ((node == instance.primary_node and not ignore_primary) or
7055 (node != instance.primary_node and not result.offline)):
7060 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7061 """Checks if a node has enough free memory.
7063 This function checks if a given node has the needed amount of free
7064 memory. In case the node has less memory or we cannot get the
7065 information from the node, this function raises an OpPrereqError
7068 @type lu: C{LogicalUnit}
7069 @param lu: a logical unit from which we get configuration data
7071 @param node: the node to check
7072 @type reason: C{str}
7073 @param reason: string to use in the error message
7074 @type requested: C{int}
7075 @param requested: the amount of memory in MiB to check for
7076 @type hypervisor_name: C{str}
7077 @param hypervisor_name: the hypervisor to ask for memory stats
7079 @return: node current free memory
7080 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7081 we cannot check the node
7084 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7085 nodeinfo[node].Raise("Can't get data from node %s" % node,
7086 prereq=True, ecode=errors.ECODE_ENVIRON)
7087 (_, _, (hv_info, )) = nodeinfo[node].payload
7089 free_mem = hv_info.get("memory_free", None)
7090 if not isinstance(free_mem, int):
7091 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7092 " was '%s'" % (node, free_mem),
7093 errors.ECODE_ENVIRON)
7094 if requested > free_mem:
7095 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7096 " needed %s MiB, available %s MiB" %
7097 (node, reason, requested, free_mem),
7102 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7103 """Checks if nodes have enough free disk space in all the VGs.
7105 This function checks if all given nodes have the needed amount of
7106 free disk. In case any node has less disk or we cannot get the
7107 information from the node, this function raises an OpPrereqError
7110 @type lu: C{LogicalUnit}
7111 @param lu: a logical unit from which we get configuration data
7112 @type nodenames: C{list}
7113 @param nodenames: the list of node names to check
7114 @type req_sizes: C{dict}
7115 @param req_sizes: the hash of vg and corresponding amount of disk in
7117 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7118 or we cannot check the node
7121 for vg, req_size in req_sizes.items():
7122 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7125 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7126 """Checks if nodes have enough free disk space in the specified VG.
7128 This function checks if all given nodes have the needed amount of
7129 free disk. In case any node has less disk or we cannot get the
7130 information from the node, this function raises an OpPrereqError
7133 @type lu: C{LogicalUnit}
7134 @param lu: a logical unit from which we get configuration data
7135 @type nodenames: C{list}
7136 @param nodenames: the list of node names to check
7138 @param vg: the volume group to check
7139 @type requested: C{int}
7140 @param requested: the amount of disk in MiB to check for
7141 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7142 or we cannot check the node
7145 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7146 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7147 for node in nodenames:
7148 info = nodeinfo[node]
7149 info.Raise("Cannot get current information from node %s" % node,
7150 prereq=True, ecode=errors.ECODE_ENVIRON)
7151 (_, (vg_info, ), _) = info.payload
7152 vg_free = vg_info.get("vg_free", None)
7153 if not isinstance(vg_free, int):
7154 raise errors.OpPrereqError("Can't compute free disk space on node"
7155 " %s for vg %s, result was '%s'" %
7156 (node, vg, vg_free), errors.ECODE_ENVIRON)
7157 if requested > vg_free:
7158 raise errors.OpPrereqError("Not enough disk space on target node %s"
7159 " vg %s: required %d MiB, available %d MiB" %
7160 (node, vg, requested, vg_free),
7164 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7165 """Checks if nodes have enough physical CPUs
7167 This function checks if all given nodes have the needed number of
7168 physical CPUs. In case any node has less CPUs or we cannot get the
7169 information from the node, this function raises an OpPrereqError
7172 @type lu: C{LogicalUnit}
7173 @param lu: a logical unit from which we get configuration data
7174 @type nodenames: C{list}
7175 @param nodenames: the list of node names to check
7176 @type requested: C{int}
7177 @param requested: the minimum acceptable number of physical CPUs
7178 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7179 or we cannot check the node
7182 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7183 for node in nodenames:
7184 info = nodeinfo[node]
7185 info.Raise("Cannot get current information from node %s" % node,
7186 prereq=True, ecode=errors.ECODE_ENVIRON)
7187 (_, _, (hv_info, )) = info.payload
7188 num_cpus = hv_info.get("cpu_total", None)
7189 if not isinstance(num_cpus, int):
7190 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7191 " on node %s, result was '%s'" %
7192 (node, num_cpus), errors.ECODE_ENVIRON)
7193 if requested > num_cpus:
7194 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7195 "required" % (node, num_cpus, requested),
7199 class LUInstanceStartup(LogicalUnit):
7200 """Starts an instance.
7203 HPATH = "instance-start"
7204 HTYPE = constants.HTYPE_INSTANCE
7207 def CheckArguments(self):
7209 if self.op.beparams:
7210 # fill the beparams dict
7211 objects.UpgradeBeParams(self.op.beparams)
7212 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7214 def ExpandNames(self):
7215 self._ExpandAndLockInstance()
7216 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7218 def DeclareLocks(self, level):
7219 if level == locking.LEVEL_NODE_RES:
7220 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7222 def BuildHooksEnv(self):
7225 This runs on master, primary and secondary nodes of the instance.
7229 "FORCE": self.op.force,
7232 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7236 def BuildHooksNodes(self):
7237 """Build hooks nodes.
7240 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7243 def CheckPrereq(self):
7244 """Check prerequisites.
7246 This checks that the instance is in the cluster.
7249 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7250 assert self.instance is not None, \
7251 "Cannot retrieve locked instance %s" % self.op.instance_name
7254 if self.op.hvparams:
7255 # check hypervisor parameter syntax (locally)
7256 cluster = self.cfg.GetClusterInfo()
7257 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7258 filled_hvp = cluster.FillHV(instance)
7259 filled_hvp.update(self.op.hvparams)
7260 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7261 hv_type.CheckParameterSyntax(filled_hvp)
7262 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7264 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7266 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7268 if self.primary_offline and self.op.ignore_offline_nodes:
7269 self.LogWarning("Ignoring offline primary node")
7271 if self.op.hvparams or self.op.beparams:
7272 self.LogWarning("Overridden parameters are ignored")
7274 _CheckNodeOnline(self, instance.primary_node)
7276 bep = self.cfg.GetClusterInfo().FillBE(instance)
7277 bep.update(self.op.beparams)
7279 # check bridges existence
7280 _CheckInstanceBridgesExist(self, instance)
7282 remote_info = self.rpc.call_instance_info(instance.primary_node,
7284 instance.hypervisor)
7285 remote_info.Raise("Error checking node %s" % instance.primary_node,
7286 prereq=True, ecode=errors.ECODE_ENVIRON)
7287 if not remote_info.payload: # not running already
7288 _CheckNodeFreeMemory(self, instance.primary_node,
7289 "starting instance %s" % instance.name,
7290 bep[constants.BE_MINMEM], instance.hypervisor)
7292 def Exec(self, feedback_fn):
7293 """Start the instance.
7296 instance = self.instance
7297 force = self.op.force
7299 if not self.op.no_remember:
7300 self.cfg.MarkInstanceUp(instance.name)
7302 if self.primary_offline:
7303 assert self.op.ignore_offline_nodes
7304 self.LogInfo("Primary node offline, marked instance as started")
7306 node_current = instance.primary_node
7308 _StartInstanceDisks(self, instance, force)
7311 self.rpc.call_instance_start(node_current,
7312 (instance, self.op.hvparams,
7314 self.op.startup_paused)
7315 msg = result.fail_msg
7317 _ShutdownInstanceDisks(self, instance)
7318 raise errors.OpExecError("Could not start instance: %s" % msg)
7321 class LUInstanceReboot(LogicalUnit):
7322 """Reboot an instance.
7325 HPATH = "instance-reboot"
7326 HTYPE = constants.HTYPE_INSTANCE
7329 def ExpandNames(self):
7330 self._ExpandAndLockInstance()
7332 def BuildHooksEnv(self):
7335 This runs on master, primary and secondary nodes of the instance.
7339 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7340 "REBOOT_TYPE": self.op.reboot_type,
7341 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7344 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7348 def BuildHooksNodes(self):
7349 """Build hooks nodes.
7352 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7355 def CheckPrereq(self):
7356 """Check prerequisites.
7358 This checks that the instance is in the cluster.
7361 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7362 assert self.instance is not None, \
7363 "Cannot retrieve locked instance %s" % self.op.instance_name
7364 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7365 _CheckNodeOnline(self, instance.primary_node)
7367 # check bridges existence
7368 _CheckInstanceBridgesExist(self, instance)
7370 def Exec(self, feedback_fn):
7371 """Reboot the instance.
7374 instance = self.instance
7375 ignore_secondaries = self.op.ignore_secondaries
7376 reboot_type = self.op.reboot_type
7378 remote_info = self.rpc.call_instance_info(instance.primary_node,
7380 instance.hypervisor)
7381 remote_info.Raise("Error checking node %s" % instance.primary_node)
7382 instance_running = bool(remote_info.payload)
7384 node_current = instance.primary_node
7386 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7387 constants.INSTANCE_REBOOT_HARD]:
7388 for disk in instance.disks:
7389 self.cfg.SetDiskID(disk, node_current)
7390 result = self.rpc.call_instance_reboot(node_current, instance,
7392 self.op.shutdown_timeout)
7393 result.Raise("Could not reboot instance")
7395 if instance_running:
7396 result = self.rpc.call_instance_shutdown(node_current, instance,
7397 self.op.shutdown_timeout)
7398 result.Raise("Could not shutdown instance for full reboot")
7399 _ShutdownInstanceDisks(self, instance)
7401 self.LogInfo("Instance %s was already stopped, starting now",
7403 _StartInstanceDisks(self, instance, ignore_secondaries)
7404 result = self.rpc.call_instance_start(node_current,
7405 (instance, None, None), False)
7406 msg = result.fail_msg
7408 _ShutdownInstanceDisks(self, instance)
7409 raise errors.OpExecError("Could not start instance for"
7410 " full reboot: %s" % msg)
7412 self.cfg.MarkInstanceUp(instance.name)
7415 class LUInstanceShutdown(LogicalUnit):
7416 """Shutdown an instance.
7419 HPATH = "instance-stop"
7420 HTYPE = constants.HTYPE_INSTANCE
7423 def ExpandNames(self):
7424 self._ExpandAndLockInstance()
7426 def BuildHooksEnv(self):
7429 This runs on master, primary and secondary nodes of the instance.
7432 env = _BuildInstanceHookEnvByObject(self, self.instance)
7433 env["TIMEOUT"] = self.op.timeout
7436 def BuildHooksNodes(self):
7437 """Build hooks nodes.
7440 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7443 def CheckPrereq(self):
7444 """Check prerequisites.
7446 This checks that the instance is in the cluster.
7449 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7450 assert self.instance is not None, \
7451 "Cannot retrieve locked instance %s" % self.op.instance_name
7453 if not self.op.force:
7454 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7456 self.LogWarning("Ignoring offline instance check")
7458 self.primary_offline = \
7459 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7461 if self.primary_offline and self.op.ignore_offline_nodes:
7462 self.LogWarning("Ignoring offline primary node")
7464 _CheckNodeOnline(self, self.instance.primary_node)
7466 def Exec(self, feedback_fn):
7467 """Shutdown the instance.
7470 instance = self.instance
7471 node_current = instance.primary_node
7472 timeout = self.op.timeout
7474 # If the instance is offline we shouldn't mark it as down, as that
7475 # resets the offline flag.
7476 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7477 self.cfg.MarkInstanceDown(instance.name)
7479 if self.primary_offline:
7480 assert self.op.ignore_offline_nodes
7481 self.LogInfo("Primary node offline, marked instance as stopped")
7483 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7484 msg = result.fail_msg
7486 self.LogWarning("Could not shutdown instance: %s", msg)
7488 _ShutdownInstanceDisks(self, instance)
7491 class LUInstanceReinstall(LogicalUnit):
7492 """Reinstall an instance.
7495 HPATH = "instance-reinstall"
7496 HTYPE = constants.HTYPE_INSTANCE
7499 def ExpandNames(self):
7500 self._ExpandAndLockInstance()
7502 def BuildHooksEnv(self):
7505 This runs on master, primary and secondary nodes of the instance.
7508 return _BuildInstanceHookEnvByObject(self, self.instance)
7510 def BuildHooksNodes(self):
7511 """Build hooks nodes.
7514 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7517 def CheckPrereq(self):
7518 """Check prerequisites.
7520 This checks that the instance is in the cluster and is not running.
7523 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7524 assert instance is not None, \
7525 "Cannot retrieve locked instance %s" % self.op.instance_name
7526 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7527 " offline, cannot reinstall")
7529 if instance.disk_template == constants.DT_DISKLESS:
7530 raise errors.OpPrereqError("Instance '%s' has no disks" %
7531 self.op.instance_name,
7533 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7535 if self.op.os_type is not None:
7537 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7538 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7539 instance_os = self.op.os_type
7541 instance_os = instance.os
7543 nodelist = list(instance.all_nodes)
7545 if self.op.osparams:
7546 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7547 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7548 self.os_inst = i_osdict # the new dict (without defaults)
7552 self.instance = instance
7554 def Exec(self, feedback_fn):
7555 """Reinstall the instance.
7558 inst = self.instance
7560 if self.op.os_type is not None:
7561 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7562 inst.os = self.op.os_type
7563 # Write to configuration
7564 self.cfg.Update(inst, feedback_fn)
7566 _StartInstanceDisks(self, inst, None)
7568 feedback_fn("Running the instance OS create scripts...")
7569 # FIXME: pass debug option from opcode to backend
7570 result = self.rpc.call_instance_os_add(inst.primary_node,
7571 (inst, self.os_inst), True,
7572 self.op.debug_level)
7573 result.Raise("Could not install OS for instance %s on node %s" %
7574 (inst.name, inst.primary_node))
7576 _ShutdownInstanceDisks(self, inst)
7579 class LUInstanceRecreateDisks(LogicalUnit):
7580 """Recreate an instance's missing disks.
7583 HPATH = "instance-recreate-disks"
7584 HTYPE = constants.HTYPE_INSTANCE
7587 _MODIFYABLE = compat.UniqueFrozenset([
7588 constants.IDISK_SIZE,
7589 constants.IDISK_MODE,
7592 # New or changed disk parameters may have different semantics
7593 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7594 constants.IDISK_ADOPT,
7596 # TODO: Implement support changing VG while recreating
7598 constants.IDISK_METAVG,
7599 constants.IDISK_PROVIDER,
7602 def _RunAllocator(self):
7603 """Run the allocator based on input opcode.
7606 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7609 # The allocator should actually run in "relocate" mode, but current
7610 # allocators don't support relocating all the nodes of an instance at
7611 # the same time. As a workaround we use "allocate" mode, but this is
7612 # suboptimal for two reasons:
7613 # - The instance name passed to the allocator is present in the list of
7614 # existing instances, so there could be a conflict within the
7615 # internal structures of the allocator. This doesn't happen with the
7616 # current allocators, but it's a liability.
7617 # - The allocator counts the resources used by the instance twice: once
7618 # because the instance exists already, and once because it tries to
7619 # allocate a new instance.
7620 # The allocator could choose some of the nodes on which the instance is
7621 # running, but that's not a problem. If the instance nodes are broken,
7622 # they should be already be marked as drained or offline, and hence
7623 # skipped by the allocator. If instance disks have been lost for other
7624 # reasons, then recreating the disks on the same nodes should be fine.
7625 disk_template = self.instance.disk_template
7626 spindle_use = be_full[constants.BE_SPINDLE_USE]
7627 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7628 disk_template=disk_template,
7629 tags=list(self.instance.GetTags()),
7630 os=self.instance.os,
7632 vcpus=be_full[constants.BE_VCPUS],
7633 memory=be_full[constants.BE_MAXMEM],
7634 spindle_use=spindle_use,
7635 disks=[{constants.IDISK_SIZE: d.size,
7636 constants.IDISK_MODE: d.mode}
7637 for d in self.instance.disks],
7638 hypervisor=self.instance.hypervisor,
7639 node_whitelist=None)
7640 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7642 ial.Run(self.op.iallocator)
7644 assert req.RequiredNodes() == len(self.instance.all_nodes)
7647 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7648 " %s" % (self.op.iallocator, ial.info),
7651 self.op.nodes = ial.result
7652 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7653 self.op.instance_name, self.op.iallocator,
7654 utils.CommaJoin(ial.result))
7656 def CheckArguments(self):
7657 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7658 # Normalize and convert deprecated list of disk indices
7659 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7661 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7663 raise errors.OpPrereqError("Some disks have been specified more than"
7664 " once: %s" % utils.CommaJoin(duplicates),
7667 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7668 # when neither iallocator nor nodes are specified
7669 if self.op.iallocator or self.op.nodes:
7670 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7672 for (idx, params) in self.op.disks:
7673 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7674 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7676 raise errors.OpPrereqError("Parameters for disk %s try to change"
7677 " unmodifyable parameter(s): %s" %
7678 (idx, utils.CommaJoin(unsupported)),
7681 def ExpandNames(self):
7682 self._ExpandAndLockInstance()
7683 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7686 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7687 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7689 self.needed_locks[locking.LEVEL_NODE] = []
7690 if self.op.iallocator:
7691 # iallocator will select a new node in the same group
7692 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7693 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7695 self.needed_locks[locking.LEVEL_NODE_RES] = []
7697 def DeclareLocks(self, level):
7698 if level == locking.LEVEL_NODEGROUP:
7699 assert self.op.iallocator is not None
7700 assert not self.op.nodes
7701 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7702 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7703 # Lock the primary group used by the instance optimistically; this
7704 # requires going via the node before it's locked, requiring
7705 # verification later on
7706 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7707 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7709 elif level == locking.LEVEL_NODE:
7710 # If an allocator is used, then we lock all the nodes in the current
7711 # instance group, as we don't know yet which ones will be selected;
7712 # if we replace the nodes without using an allocator, locks are
7713 # already declared in ExpandNames; otherwise, we need to lock all the
7714 # instance nodes for disk re-creation
7715 if self.op.iallocator:
7716 assert not self.op.nodes
7717 assert not self.needed_locks[locking.LEVEL_NODE]
7718 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7720 # Lock member nodes of the group of the primary node
7721 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7722 self.needed_locks[locking.LEVEL_NODE].extend(
7723 self.cfg.GetNodeGroup(group_uuid).members)
7725 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7726 elif not self.op.nodes:
7727 self._LockInstancesNodes(primary_only=False)
7728 elif level == locking.LEVEL_NODE_RES:
7730 self.needed_locks[locking.LEVEL_NODE_RES] = \
7731 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7733 def BuildHooksEnv(self):
7736 This runs on master, primary and secondary nodes of the instance.
7739 return _BuildInstanceHookEnvByObject(self, self.instance)
7741 def BuildHooksNodes(self):
7742 """Build hooks nodes.
7745 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7748 def CheckPrereq(self):
7749 """Check prerequisites.
7751 This checks that the instance is in the cluster and is not running.
7754 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7755 assert instance is not None, \
7756 "Cannot retrieve locked instance %s" % self.op.instance_name
7758 if len(self.op.nodes) != len(instance.all_nodes):
7759 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7760 " %d replacement nodes were specified" %
7761 (instance.name, len(instance.all_nodes),
7762 len(self.op.nodes)),
7764 assert instance.disk_template != constants.DT_DRBD8 or \
7765 len(self.op.nodes) == 2
7766 assert instance.disk_template != constants.DT_PLAIN or \
7767 len(self.op.nodes) == 1
7768 primary_node = self.op.nodes[0]
7770 primary_node = instance.primary_node
7771 if not self.op.iallocator:
7772 _CheckNodeOnline(self, primary_node)
7774 if instance.disk_template == constants.DT_DISKLESS:
7775 raise errors.OpPrereqError("Instance '%s' has no disks" %
7776 self.op.instance_name, errors.ECODE_INVAL)
7778 # Verify if node group locks are still correct
7779 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7781 # Node group locks are acquired only for the primary node (and only
7782 # when the allocator is used)
7783 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7786 # if we replace nodes *and* the old primary is offline, we don't
7787 # check the instance state
7788 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7789 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7790 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7791 msg="cannot recreate disks")
7794 self.disks = dict(self.op.disks)
7796 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7798 maxidx = max(self.disks.keys())
7799 if maxidx >= len(instance.disks):
7800 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7803 if ((self.op.nodes or self.op.iallocator) and
7804 sorted(self.disks.keys()) != range(len(instance.disks))):
7805 raise errors.OpPrereqError("Can't recreate disks partially and"
7806 " change the nodes at the same time",
7809 self.instance = instance
7811 if self.op.iallocator:
7812 self._RunAllocator()
7813 # Release unneeded node and node resource locks
7814 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7815 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7816 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7818 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7820 def Exec(self, feedback_fn):
7821 """Recreate the disks.
7824 instance = self.instance
7826 assert (self.owned_locks(locking.LEVEL_NODE) ==
7827 self.owned_locks(locking.LEVEL_NODE_RES))
7830 mods = [] # keeps track of needed changes
7832 for idx, disk in enumerate(instance.disks):
7834 changes = self.disks[idx]
7836 # Disk should not be recreated
7840 # update secondaries for disks, if needed
7841 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7842 # need to update the nodes and minors
7843 assert len(self.op.nodes) == 2
7844 assert len(disk.logical_id) == 6 # otherwise disk internals
7846 (_, _, old_port, _, _, old_secret) = disk.logical_id
7847 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7848 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7849 new_minors[0], new_minors[1], old_secret)
7850 assert len(disk.logical_id) == len(new_id)
7854 mods.append((idx, new_id, changes))
7856 # now that we have passed all asserts above, we can apply the mods
7857 # in a single run (to avoid partial changes)
7858 for idx, new_id, changes in mods:
7859 disk = instance.disks[idx]
7860 if new_id is not None:
7861 assert disk.dev_type == constants.LD_DRBD8
7862 disk.logical_id = new_id
7864 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7865 mode=changes.get(constants.IDISK_MODE, None))
7867 # change primary node, if needed
7869 instance.primary_node = self.op.nodes[0]
7870 self.LogWarning("Changing the instance's nodes, you will have to"
7871 " remove any disks left on the older nodes manually")
7874 self.cfg.Update(instance, feedback_fn)
7876 # All touched nodes must be locked
7877 mylocks = self.owned_locks(locking.LEVEL_NODE)
7878 assert mylocks.issuperset(frozenset(instance.all_nodes))
7879 _CreateDisks(self, instance, to_skip=to_skip)
7882 class LUInstanceRename(LogicalUnit):
7883 """Rename an instance.
7886 HPATH = "instance-rename"
7887 HTYPE = constants.HTYPE_INSTANCE
7889 def CheckArguments(self):
7893 if self.op.ip_check and not self.op.name_check:
7894 # TODO: make the ip check more flexible and not depend on the name check
7895 raise errors.OpPrereqError("IP address check requires a name check",
7898 def BuildHooksEnv(self):
7901 This runs on master, primary and secondary nodes of the instance.
7904 env = _BuildInstanceHookEnvByObject(self, self.instance)
7905 env["INSTANCE_NEW_NAME"] = self.op.new_name
7908 def BuildHooksNodes(self):
7909 """Build hooks nodes.
7912 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7915 def CheckPrereq(self):
7916 """Check prerequisites.
7918 This checks that the instance is in the cluster and is not running.
7921 self.op.instance_name = _ExpandInstanceName(self.cfg,
7922 self.op.instance_name)
7923 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7924 assert instance is not None
7925 _CheckNodeOnline(self, instance.primary_node)
7926 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7927 msg="cannot rename")
7928 self.instance = instance
7930 new_name = self.op.new_name
7931 if self.op.name_check:
7932 hostname = _CheckHostnameSane(self, new_name)
7933 new_name = self.op.new_name = hostname.name
7934 if (self.op.ip_check and
7935 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7936 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7937 (hostname.ip, new_name),
7938 errors.ECODE_NOTUNIQUE)
7940 instance_list = self.cfg.GetInstanceList()
7941 if new_name in instance_list and new_name != instance.name:
7942 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7943 new_name, errors.ECODE_EXISTS)
7945 def Exec(self, feedback_fn):
7946 """Rename the instance.
7949 inst = self.instance
7950 old_name = inst.name
7952 rename_file_storage = False
7953 if (inst.disk_template in constants.DTS_FILEBASED and
7954 self.op.new_name != inst.name):
7955 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7956 rename_file_storage = True
7958 self.cfg.RenameInstance(inst.name, self.op.new_name)
7959 # Change the instance lock. This is definitely safe while we hold the BGL.
7960 # Otherwise the new lock would have to be added in acquired mode.
7962 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7963 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7964 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7966 # re-read the instance from the configuration after rename
7967 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7969 if rename_file_storage:
7970 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7971 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7972 old_file_storage_dir,
7973 new_file_storage_dir)
7974 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7975 " (but the instance has been renamed in Ganeti)" %
7976 (inst.primary_node, old_file_storage_dir,
7977 new_file_storage_dir))
7979 _StartInstanceDisks(self, inst, None)
7980 # update info on disks
7981 info = _GetInstanceInfoText(inst)
7982 for (idx, disk) in enumerate(inst.disks):
7983 for node in inst.all_nodes:
7984 self.cfg.SetDiskID(disk, node)
7985 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7987 self.LogWarning("Error setting info on node %s for disk %s: %s",
7988 node, idx, result.fail_msg)
7990 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7991 old_name, self.op.debug_level)
7992 msg = result.fail_msg
7994 msg = ("Could not run OS rename script for instance %s on node %s"
7995 " (but the instance has been renamed in Ganeti): %s" %
7996 (inst.name, inst.primary_node, msg))
7997 self.LogWarning(msg)
7999 _ShutdownInstanceDisks(self, inst)
8004 class LUInstanceRemove(LogicalUnit):
8005 """Remove an instance.
8008 HPATH = "instance-remove"
8009 HTYPE = constants.HTYPE_INSTANCE
8012 def ExpandNames(self):
8013 self._ExpandAndLockInstance()
8014 self.needed_locks[locking.LEVEL_NODE] = []
8015 self.needed_locks[locking.LEVEL_NODE_RES] = []
8016 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8018 def DeclareLocks(self, level):
8019 if level == locking.LEVEL_NODE:
8020 self._LockInstancesNodes()
8021 elif level == locking.LEVEL_NODE_RES:
8023 self.needed_locks[locking.LEVEL_NODE_RES] = \
8024 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8026 def BuildHooksEnv(self):
8029 This runs on master, primary and secondary nodes of the instance.
8032 env = _BuildInstanceHookEnvByObject(self, self.instance)
8033 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8036 def BuildHooksNodes(self):
8037 """Build hooks nodes.
8040 nl = [self.cfg.GetMasterNode()]
8041 nl_post = list(self.instance.all_nodes) + nl
8042 return (nl, nl_post)
8044 def CheckPrereq(self):
8045 """Check prerequisites.
8047 This checks that the instance is in the cluster.
8050 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8051 assert self.instance is not None, \
8052 "Cannot retrieve locked instance %s" % self.op.instance_name
8054 def Exec(self, feedback_fn):
8055 """Remove the instance.
8058 instance = self.instance
8059 logging.info("Shutting down instance %s on node %s",
8060 instance.name, instance.primary_node)
8062 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8063 self.op.shutdown_timeout)
8064 msg = result.fail_msg
8066 if self.op.ignore_failures:
8067 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8069 raise errors.OpExecError("Could not shutdown instance %s on"
8071 (instance.name, instance.primary_node, msg))
8073 assert (self.owned_locks(locking.LEVEL_NODE) ==
8074 self.owned_locks(locking.LEVEL_NODE_RES))
8075 assert not (set(instance.all_nodes) -
8076 self.owned_locks(locking.LEVEL_NODE)), \
8077 "Not owning correct locks"
8079 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8082 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8083 """Utility function to remove an instance.
8086 logging.info("Removing block devices for instance %s", instance.name)
8088 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8089 if not ignore_failures:
8090 raise errors.OpExecError("Can't remove instance's disks")
8091 feedback_fn("Warning: can't remove instance's disks")
8093 logging.info("Removing instance %s out of cluster config", instance.name)
8095 lu.cfg.RemoveInstance(instance.name)
8097 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8098 "Instance lock removal conflict"
8100 # Remove lock for the instance
8101 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8104 class LUInstanceQuery(NoHooksLU):
8105 """Logical unit for querying instances.
8108 # pylint: disable=W0142
8111 def CheckArguments(self):
8112 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8113 self.op.output_fields, self.op.use_locking)
8115 def ExpandNames(self):
8116 self.iq.ExpandNames(self)
8118 def DeclareLocks(self, level):
8119 self.iq.DeclareLocks(self, level)
8121 def Exec(self, feedback_fn):
8122 return self.iq.OldStyleQuery(self)
8125 def _ExpandNamesForMigration(lu):
8126 """Expands names for use with L{TLMigrateInstance}.
8128 @type lu: L{LogicalUnit}
8131 if lu.op.target_node is not None:
8132 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8134 lu.needed_locks[locking.LEVEL_NODE] = []
8135 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8137 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8138 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8140 # The node allocation lock is actually only needed for replicated instances
8141 # (e.g. DRBD8) and if an iallocator is used.
8142 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8145 def _DeclareLocksForMigration(lu, level):
8146 """Declares locks for L{TLMigrateInstance}.
8148 @type lu: L{LogicalUnit}
8149 @param level: Lock level
8152 if level == locking.LEVEL_NODE_ALLOC:
8153 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8155 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8157 # Node locks are already declared here rather than at LEVEL_NODE as we need
8158 # the instance object anyway to declare the node allocation lock.
8159 if instance.disk_template in constants.DTS_EXT_MIRROR:
8160 if lu.op.target_node is None:
8161 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8162 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8164 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8166 del lu.recalculate_locks[locking.LEVEL_NODE]
8168 lu._LockInstancesNodes() # pylint: disable=W0212
8170 elif level == locking.LEVEL_NODE:
8171 # Node locks are declared together with the node allocation lock
8172 assert (lu.needed_locks[locking.LEVEL_NODE] or
8173 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8175 elif level == locking.LEVEL_NODE_RES:
8177 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8178 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8181 class LUInstanceFailover(LogicalUnit):
8182 """Failover an instance.
8185 HPATH = "instance-failover"
8186 HTYPE = constants.HTYPE_INSTANCE
8189 def CheckArguments(self):
8190 """Check the arguments.
8193 self.iallocator = getattr(self.op, "iallocator", None)
8194 self.target_node = getattr(self.op, "target_node", None)
8196 def ExpandNames(self):
8197 self._ExpandAndLockInstance()
8198 _ExpandNamesForMigration(self)
8201 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8202 self.op.ignore_consistency, True,
8203 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8205 self.tasklets = [self._migrater]
8207 def DeclareLocks(self, level):
8208 _DeclareLocksForMigration(self, level)
8210 def BuildHooksEnv(self):
8213 This runs on master, primary and secondary nodes of the instance.
8216 instance = self._migrater.instance
8217 source_node = instance.primary_node
8218 target_node = self.op.target_node
8220 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8221 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8222 "OLD_PRIMARY": source_node,
8223 "NEW_PRIMARY": target_node,
8226 if instance.disk_template in constants.DTS_INT_MIRROR:
8227 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8228 env["NEW_SECONDARY"] = source_node
8230 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8232 env.update(_BuildInstanceHookEnvByObject(self, instance))
8236 def BuildHooksNodes(self):
8237 """Build hooks nodes.
8240 instance = self._migrater.instance
8241 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8242 return (nl, nl + [instance.primary_node])
8245 class LUInstanceMigrate(LogicalUnit):
8246 """Migrate an instance.
8248 This is migration without shutting down, compared to the failover,
8249 which is done with shutdown.
8252 HPATH = "instance-migrate"
8253 HTYPE = constants.HTYPE_INSTANCE
8256 def ExpandNames(self):
8257 self._ExpandAndLockInstance()
8258 _ExpandNamesForMigration(self)
8261 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8262 False, self.op.allow_failover, False,
8263 self.op.allow_runtime_changes,
8264 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8265 self.op.ignore_ipolicy)
8267 self.tasklets = [self._migrater]
8269 def DeclareLocks(self, level):
8270 _DeclareLocksForMigration(self, level)
8272 def BuildHooksEnv(self):
8275 This runs on master, primary and secondary nodes of the instance.
8278 instance = self._migrater.instance
8279 source_node = instance.primary_node
8280 target_node = self.op.target_node
8281 env = _BuildInstanceHookEnvByObject(self, instance)
8283 "MIGRATE_LIVE": self._migrater.live,
8284 "MIGRATE_CLEANUP": self.op.cleanup,
8285 "OLD_PRIMARY": source_node,
8286 "NEW_PRIMARY": target_node,
8287 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8290 if instance.disk_template in constants.DTS_INT_MIRROR:
8291 env["OLD_SECONDARY"] = target_node
8292 env["NEW_SECONDARY"] = source_node
8294 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8298 def BuildHooksNodes(self):
8299 """Build hooks nodes.
8302 instance = self._migrater.instance
8303 snodes = list(instance.secondary_nodes)
8304 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8308 class LUInstanceMove(LogicalUnit):
8309 """Move an instance by data-copying.
8312 HPATH = "instance-move"
8313 HTYPE = constants.HTYPE_INSTANCE
8316 def ExpandNames(self):
8317 self._ExpandAndLockInstance()
8318 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8319 self.op.target_node = target_node
8320 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8321 self.needed_locks[locking.LEVEL_NODE_RES] = []
8322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8324 def DeclareLocks(self, level):
8325 if level == locking.LEVEL_NODE:
8326 self._LockInstancesNodes(primary_only=True)
8327 elif level == locking.LEVEL_NODE_RES:
8329 self.needed_locks[locking.LEVEL_NODE_RES] = \
8330 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8332 def BuildHooksEnv(self):
8335 This runs on master, primary and secondary nodes of the instance.
8339 "TARGET_NODE": self.op.target_node,
8340 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8342 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8345 def BuildHooksNodes(self):
8346 """Build hooks nodes.
8350 self.cfg.GetMasterNode(),
8351 self.instance.primary_node,
8352 self.op.target_node,
8356 def CheckPrereq(self):
8357 """Check prerequisites.
8359 This checks that the instance is in the cluster.
8362 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8363 assert self.instance is not None, \
8364 "Cannot retrieve locked instance %s" % self.op.instance_name
8366 node = self.cfg.GetNodeInfo(self.op.target_node)
8367 assert node is not None, \
8368 "Cannot retrieve locked node %s" % self.op.target_node
8370 self.target_node = target_node = node.name
8372 if target_node == instance.primary_node:
8373 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8374 (instance.name, target_node),
8377 bep = self.cfg.GetClusterInfo().FillBE(instance)
8379 for idx, dsk in enumerate(instance.disks):
8380 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8381 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8382 " cannot copy" % idx, errors.ECODE_STATE)
8384 _CheckNodeOnline(self, target_node)
8385 _CheckNodeNotDrained(self, target_node)
8386 _CheckNodeVmCapable(self, target_node)
8387 cluster = self.cfg.GetClusterInfo()
8388 group_info = self.cfg.GetNodeGroup(node.group)
8389 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8390 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8391 ignore=self.op.ignore_ipolicy)
8393 if instance.admin_state == constants.ADMINST_UP:
8394 # check memory requirements on the secondary node
8395 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8396 instance.name, bep[constants.BE_MAXMEM],
8397 instance.hypervisor)
8399 self.LogInfo("Not checking memory on the secondary node as"
8400 " instance will not be started")
8402 # check bridge existance
8403 _CheckInstanceBridgesExist(self, instance, node=target_node)
8405 def Exec(self, feedback_fn):
8406 """Move an instance.
8408 The move is done by shutting it down on its present node, copying
8409 the data over (slow) and starting it on the new node.
8412 instance = self.instance
8414 source_node = instance.primary_node
8415 target_node = self.target_node
8417 self.LogInfo("Shutting down instance %s on source node %s",
8418 instance.name, source_node)
8420 assert (self.owned_locks(locking.LEVEL_NODE) ==
8421 self.owned_locks(locking.LEVEL_NODE_RES))
8423 result = self.rpc.call_instance_shutdown(source_node, instance,
8424 self.op.shutdown_timeout)
8425 msg = result.fail_msg
8427 if self.op.ignore_consistency:
8428 self.LogWarning("Could not shutdown instance %s on node %s."
8429 " Proceeding anyway. Please make sure node"
8430 " %s is down. Error details: %s",
8431 instance.name, source_node, source_node, msg)
8433 raise errors.OpExecError("Could not shutdown instance %s on"
8435 (instance.name, source_node, msg))
8437 # create the target disks
8439 _CreateDisks(self, instance, target_node=target_node)
8440 except errors.OpExecError:
8441 self.LogWarning("Device creation failed, reverting...")
8443 _RemoveDisks(self, instance, target_node=target_node)
8445 self.cfg.ReleaseDRBDMinors(instance.name)
8448 cluster_name = self.cfg.GetClusterInfo().cluster_name
8451 # activate, get path, copy the data over
8452 for idx, disk in enumerate(instance.disks):
8453 self.LogInfo("Copying data for disk %d", idx)
8454 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8455 instance.name, True, idx)
8457 self.LogWarning("Can't assemble newly created disk %d: %s",
8458 idx, result.fail_msg)
8459 errs.append(result.fail_msg)
8461 dev_path = result.payload
8462 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8463 target_node, dev_path,
8466 self.LogWarning("Can't copy data over for disk %d: %s",
8467 idx, result.fail_msg)
8468 errs.append(result.fail_msg)
8472 self.LogWarning("Some disks failed to copy, aborting")
8474 _RemoveDisks(self, instance, target_node=target_node)
8476 self.cfg.ReleaseDRBDMinors(instance.name)
8477 raise errors.OpExecError("Errors during disk copy: %s" %
8480 instance.primary_node = target_node
8481 self.cfg.Update(instance, feedback_fn)
8483 self.LogInfo("Removing the disks on the original node")
8484 _RemoveDisks(self, instance, target_node=source_node)
8486 # Only start the instance if it's marked as up
8487 if instance.admin_state == constants.ADMINST_UP:
8488 self.LogInfo("Starting instance %s on node %s",
8489 instance.name, target_node)
8491 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8492 ignore_secondaries=True)
8494 _ShutdownInstanceDisks(self, instance)
8495 raise errors.OpExecError("Can't activate the instance's disks")
8497 result = self.rpc.call_instance_start(target_node,
8498 (instance, None, None), False)
8499 msg = result.fail_msg
8501 _ShutdownInstanceDisks(self, instance)
8502 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8503 (instance.name, target_node, msg))
8506 class LUNodeMigrate(LogicalUnit):
8507 """Migrate all instances from a node.
8510 HPATH = "node-migrate"
8511 HTYPE = constants.HTYPE_NODE
8514 def CheckArguments(self):
8517 def ExpandNames(self):
8518 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8520 self.share_locks = _ShareAll()
8521 self.needed_locks = {
8522 locking.LEVEL_NODE: [self.op.node_name],
8525 def BuildHooksEnv(self):
8528 This runs on the master, the primary and all the secondaries.
8532 "NODE_NAME": self.op.node_name,
8533 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8536 def BuildHooksNodes(self):
8537 """Build hooks nodes.
8540 nl = [self.cfg.GetMasterNode()]
8543 def CheckPrereq(self):
8546 def Exec(self, feedback_fn):
8547 # Prepare jobs for migration instances
8548 allow_runtime_changes = self.op.allow_runtime_changes
8550 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8553 iallocator=self.op.iallocator,
8554 target_node=self.op.target_node,
8555 allow_runtime_changes=allow_runtime_changes,
8556 ignore_ipolicy=self.op.ignore_ipolicy)]
8557 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8559 # TODO: Run iallocator in this opcode and pass correct placement options to
8560 # OpInstanceMigrate. Since other jobs can modify the cluster between
8561 # running the iallocator and the actual migration, a good consistency model
8562 # will have to be found.
8564 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8565 frozenset([self.op.node_name]))
8567 return ResultWithJobs(jobs)
8570 class TLMigrateInstance(Tasklet):
8571 """Tasklet class for instance migration.
8574 @ivar live: whether the migration will be done live or non-live;
8575 this variable is initalized only after CheckPrereq has run
8576 @type cleanup: boolean
8577 @ivar cleanup: Wheater we cleanup from a failed migration
8578 @type iallocator: string
8579 @ivar iallocator: The iallocator used to determine target_node
8580 @type target_node: string
8581 @ivar target_node: If given, the target_node to reallocate the instance to
8582 @type failover: boolean
8583 @ivar failover: Whether operation results in failover or migration
8584 @type fallback: boolean
8585 @ivar fallback: Whether fallback to failover is allowed if migration not
8587 @type ignore_consistency: boolean
8588 @ivar ignore_consistency: Wheter we should ignore consistency between source
8590 @type shutdown_timeout: int
8591 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8592 @type ignore_ipolicy: bool
8593 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8598 _MIGRATION_POLL_INTERVAL = 1 # seconds
8599 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8601 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8602 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8604 """Initializes this class.
8607 Tasklet.__init__(self, lu)
8610 self.instance_name = instance_name
8611 self.cleanup = cleanup
8612 self.live = False # will be overridden later
8613 self.failover = failover
8614 self.fallback = fallback
8615 self.ignore_consistency = ignore_consistency
8616 self.shutdown_timeout = shutdown_timeout
8617 self.ignore_ipolicy = ignore_ipolicy
8618 self.allow_runtime_changes = allow_runtime_changes
8620 def CheckPrereq(self):
8621 """Check prerequisites.
8623 This checks that the instance is in the cluster.
8626 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8627 instance = self.cfg.GetInstanceInfo(instance_name)
8628 assert instance is not None
8629 self.instance = instance
8630 cluster = self.cfg.GetClusterInfo()
8632 if (not self.cleanup and
8633 not instance.admin_state == constants.ADMINST_UP and
8634 not self.failover and self.fallback):
8635 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8636 " switching to failover")
8637 self.failover = True
8639 if instance.disk_template not in constants.DTS_MIRRORED:
8644 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8645 " %s" % (instance.disk_template, text),
8648 if instance.disk_template in constants.DTS_EXT_MIRROR:
8649 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8651 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8653 if self.lu.op.iallocator:
8654 self._RunAllocator()
8656 # We set set self.target_node as it is required by
8658 self.target_node = self.lu.op.target_node
8660 # Check that the target node is correct in terms of instance policy
8661 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8662 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8663 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8665 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8666 ignore=self.ignore_ipolicy)
8668 # self.target_node is already populated, either directly or by the
8670 target_node = self.target_node
8671 if self.target_node == instance.primary_node:
8672 raise errors.OpPrereqError("Cannot migrate instance %s"
8673 " to its primary (%s)" %
8674 (instance.name, instance.primary_node),
8677 if len(self.lu.tasklets) == 1:
8678 # It is safe to release locks only when we're the only tasklet
8680 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8681 keep=[instance.primary_node, self.target_node])
8682 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8685 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8687 secondary_nodes = instance.secondary_nodes
8688 if not secondary_nodes:
8689 raise errors.ConfigurationError("No secondary node but using"
8690 " %s disk template" %
8691 instance.disk_template)
8692 target_node = secondary_nodes[0]
8693 if self.lu.op.iallocator or (self.lu.op.target_node and
8694 self.lu.op.target_node != target_node):
8696 text = "failed over"
8699 raise errors.OpPrereqError("Instances with disk template %s cannot"
8700 " be %s to arbitrary nodes"
8701 " (neither an iallocator nor a target"
8702 " node can be passed)" %
8703 (instance.disk_template, text),
8705 nodeinfo = self.cfg.GetNodeInfo(target_node)
8706 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8707 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8709 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8710 ignore=self.ignore_ipolicy)
8712 i_be = cluster.FillBE(instance)
8714 # check memory requirements on the secondary node
8715 if (not self.cleanup and
8716 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8717 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8718 "migrating instance %s" %
8720 i_be[constants.BE_MINMEM],
8721 instance.hypervisor)
8723 self.lu.LogInfo("Not checking memory on the secondary node as"
8724 " instance will not be started")
8726 # check if failover must be forced instead of migration
8727 if (not self.cleanup and not self.failover and
8728 i_be[constants.BE_ALWAYS_FAILOVER]):
8729 self.lu.LogInfo("Instance configured to always failover; fallback"
8731 self.failover = True
8733 # check bridge existance
8734 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8736 if not self.cleanup:
8737 _CheckNodeNotDrained(self.lu, target_node)
8738 if not self.failover:
8739 result = self.rpc.call_instance_migratable(instance.primary_node,
8741 if result.fail_msg and self.fallback:
8742 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8744 self.failover = True
8746 result.Raise("Can't migrate, please use failover",
8747 prereq=True, ecode=errors.ECODE_STATE)
8749 assert not (self.failover and self.cleanup)
8751 if not self.failover:
8752 if self.lu.op.live is not None and self.lu.op.mode is not None:
8753 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8754 " parameters are accepted",
8756 if self.lu.op.live is not None:
8758 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8760 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8761 # reset the 'live' parameter to None so that repeated
8762 # invocations of CheckPrereq do not raise an exception
8763 self.lu.op.live = None
8764 elif self.lu.op.mode is None:
8765 # read the default value from the hypervisor
8766 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8767 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8769 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8771 # Failover is never live
8774 if not (self.failover or self.cleanup):
8775 remote_info = self.rpc.call_instance_info(instance.primary_node,
8777 instance.hypervisor)
8778 remote_info.Raise("Error checking instance on node %s" %
8779 instance.primary_node)
8780 instance_running = bool(remote_info.payload)
8781 if instance_running:
8782 self.current_mem = int(remote_info.payload["memory"])
8784 def _RunAllocator(self):
8785 """Run the allocator based on input opcode.
8788 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8790 # FIXME: add a self.ignore_ipolicy option
8791 req = iallocator.IAReqRelocate(name=self.instance_name,
8792 relocate_from=[self.instance.primary_node])
8793 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8795 ial.Run(self.lu.op.iallocator)
8798 raise errors.OpPrereqError("Can't compute nodes using"
8799 " iallocator '%s': %s" %
8800 (self.lu.op.iallocator, ial.info),
8802 self.target_node = ial.result[0]
8803 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8804 self.instance_name, self.lu.op.iallocator,
8805 utils.CommaJoin(ial.result))
8807 def _WaitUntilSync(self):
8808 """Poll with custom rpc for disk sync.
8810 This uses our own step-based rpc call.
8813 self.feedback_fn("* wait until resync is done")
8817 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8819 (self.instance.disks,
8822 for node, nres in result.items():
8823 nres.Raise("Cannot resync disks on node %s" % node)
8824 node_done, node_percent = nres.payload
8825 all_done = all_done and node_done
8826 if node_percent is not None:
8827 min_percent = min(min_percent, node_percent)
8829 if min_percent < 100:
8830 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8833 def _EnsureSecondary(self, node):
8834 """Demote a node to secondary.
8837 self.feedback_fn("* switching node %s to secondary mode" % node)
8839 for dev in self.instance.disks:
8840 self.cfg.SetDiskID(dev, node)
8842 result = self.rpc.call_blockdev_close(node, self.instance.name,
8843 self.instance.disks)
8844 result.Raise("Cannot change disk to secondary on node %s" % node)
8846 def _GoStandalone(self):
8847 """Disconnect from the network.
8850 self.feedback_fn("* changing into standalone mode")
8851 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8852 self.instance.disks)
8853 for node, nres in result.items():
8854 nres.Raise("Cannot disconnect disks node %s" % node)
8856 def _GoReconnect(self, multimaster):
8857 """Reconnect to the network.
8863 msg = "single-master"
8864 self.feedback_fn("* changing disks into %s mode" % msg)
8865 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8866 (self.instance.disks, self.instance),
8867 self.instance.name, multimaster)
8868 for node, nres in result.items():
8869 nres.Raise("Cannot change disks config on node %s" % node)
8871 def _ExecCleanup(self):
8872 """Try to cleanup after a failed migration.
8874 The cleanup is done by:
8875 - check that the instance is running only on one node
8876 (and update the config if needed)
8877 - change disks on its secondary node to secondary
8878 - wait until disks are fully synchronized
8879 - disconnect from the network
8880 - change disks into single-master mode
8881 - wait again until disks are fully synchronized
8884 instance = self.instance
8885 target_node = self.target_node
8886 source_node = self.source_node
8888 # check running on only one node
8889 self.feedback_fn("* checking where the instance actually runs"
8890 " (if this hangs, the hypervisor might be in"
8892 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8893 for node, result in ins_l.items():
8894 result.Raise("Can't contact node %s" % node)
8896 runningon_source = instance.name in ins_l[source_node].payload
8897 runningon_target = instance.name in ins_l[target_node].payload
8899 if runningon_source and runningon_target:
8900 raise errors.OpExecError("Instance seems to be running on two nodes,"
8901 " or the hypervisor is confused; you will have"
8902 " to ensure manually that it runs only on one"
8903 " and restart this operation")
8905 if not (runningon_source or runningon_target):
8906 raise errors.OpExecError("Instance does not seem to be running at all;"
8907 " in this case it's safer to repair by"
8908 " running 'gnt-instance stop' to ensure disk"
8909 " shutdown, and then restarting it")
8911 if runningon_target:
8912 # the migration has actually succeeded, we need to update the config
8913 self.feedback_fn("* instance running on secondary node (%s),"
8914 " updating config" % target_node)
8915 instance.primary_node = target_node
8916 self.cfg.Update(instance, self.feedback_fn)
8917 demoted_node = source_node
8919 self.feedback_fn("* instance confirmed to be running on its"
8920 " primary node (%s)" % source_node)
8921 demoted_node = target_node
8923 if instance.disk_template in constants.DTS_INT_MIRROR:
8924 self._EnsureSecondary(demoted_node)
8926 self._WaitUntilSync()
8927 except errors.OpExecError:
8928 # we ignore here errors, since if the device is standalone, it
8929 # won't be able to sync
8931 self._GoStandalone()
8932 self._GoReconnect(False)
8933 self._WaitUntilSync()
8935 self.feedback_fn("* done")
8937 def _RevertDiskStatus(self):
8938 """Try to revert the disk status after a failed migration.
8941 target_node = self.target_node
8942 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8946 self._EnsureSecondary(target_node)
8947 self._GoStandalone()
8948 self._GoReconnect(False)
8949 self._WaitUntilSync()
8950 except errors.OpExecError, err:
8951 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8952 " please try to recover the instance manually;"
8953 " error '%s'" % str(err))
8955 def _AbortMigration(self):
8956 """Call the hypervisor code to abort a started migration.
8959 instance = self.instance
8960 target_node = self.target_node
8961 source_node = self.source_node
8962 migration_info = self.migration_info
8964 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8968 abort_msg = abort_result.fail_msg
8970 logging.error("Aborting migration failed on target node %s: %s",
8971 target_node, abort_msg)
8972 # Don't raise an exception here, as we stil have to try to revert the
8973 # disk status, even if this step failed.
8975 abort_result = self.rpc.call_instance_finalize_migration_src(
8976 source_node, instance, False, self.live)
8977 abort_msg = abort_result.fail_msg
8979 logging.error("Aborting migration failed on source node %s: %s",
8980 source_node, abort_msg)
8982 def _ExecMigration(self):
8983 """Migrate an instance.
8985 The migrate is done by:
8986 - change the disks into dual-master mode
8987 - wait until disks are fully synchronized again
8988 - migrate the instance
8989 - change disks on the new secondary node (the old primary) to secondary
8990 - wait until disks are fully synchronized
8991 - change disks into single-master mode
8994 instance = self.instance
8995 target_node = self.target_node
8996 source_node = self.source_node
8998 # Check for hypervisor version mismatch and warn the user.
8999 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9000 None, [self.instance.hypervisor], False)
9001 for ninfo in nodeinfo.values():
9002 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9004 (_, _, (src_info, )) = nodeinfo[source_node].payload
9005 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9007 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9008 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9009 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9010 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9011 if src_version != dst_version:
9012 self.feedback_fn("* warning: hypervisor version mismatch between"
9013 " source (%s) and target (%s) node" %
9014 (src_version, dst_version))
9016 self.feedback_fn("* checking disk consistency between source and target")
9017 for (idx, dev) in enumerate(instance.disks):
9018 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9019 raise errors.OpExecError("Disk %s is degraded or not fully"
9020 " synchronized on target node,"
9021 " aborting migration" % idx)
9023 if self.current_mem > self.tgt_free_mem:
9024 if not self.allow_runtime_changes:
9025 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9026 " free memory to fit instance %s on target"
9027 " node %s (have %dMB, need %dMB)" %
9028 (instance.name, target_node,
9029 self.tgt_free_mem, self.current_mem))
9030 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9031 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9034 rpcres.Raise("Cannot modify instance runtime memory")
9036 # First get the migration information from the remote node
9037 result = self.rpc.call_migration_info(source_node, instance)
9038 msg = result.fail_msg
9040 log_err = ("Failed fetching source migration information from %s: %s" %
9042 logging.error(log_err)
9043 raise errors.OpExecError(log_err)
9045 self.migration_info = migration_info = result.payload
9047 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9048 # Then switch the disks to master/master mode
9049 self._EnsureSecondary(target_node)
9050 self._GoStandalone()
9051 self._GoReconnect(True)
9052 self._WaitUntilSync()
9054 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9055 result = self.rpc.call_accept_instance(target_node,
9058 self.nodes_ip[target_node])
9060 msg = result.fail_msg
9062 logging.error("Instance pre-migration failed, trying to revert"
9063 " disk status: %s", msg)
9064 self.feedback_fn("Pre-migration failed, aborting")
9065 self._AbortMigration()
9066 self._RevertDiskStatus()
9067 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9068 (instance.name, msg))
9070 self.feedback_fn("* migrating instance to %s" % target_node)
9071 result = self.rpc.call_instance_migrate(source_node, instance,
9072 self.nodes_ip[target_node],
9074 msg = result.fail_msg
9076 logging.error("Instance migration failed, trying to revert"
9077 " disk status: %s", msg)
9078 self.feedback_fn("Migration failed, aborting")
9079 self._AbortMigration()
9080 self._RevertDiskStatus()
9081 raise errors.OpExecError("Could not migrate instance %s: %s" %
9082 (instance.name, msg))
9084 self.feedback_fn("* starting memory transfer")
9085 last_feedback = time.time()
9087 result = self.rpc.call_instance_get_migration_status(source_node,
9089 msg = result.fail_msg
9090 ms = result.payload # MigrationStatus instance
9091 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9092 logging.error("Instance migration failed, trying to revert"
9093 " disk status: %s", msg)
9094 self.feedback_fn("Migration failed, aborting")
9095 self._AbortMigration()
9096 self._RevertDiskStatus()
9098 msg = "hypervisor returned failure"
9099 raise errors.OpExecError("Could not migrate instance %s: %s" %
9100 (instance.name, msg))
9102 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9103 self.feedback_fn("* memory transfer complete")
9106 if (utils.TimeoutExpired(last_feedback,
9107 self._MIGRATION_FEEDBACK_INTERVAL) and
9108 ms.transferred_ram is not None):
9109 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9110 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9111 last_feedback = time.time()
9113 time.sleep(self._MIGRATION_POLL_INTERVAL)
9115 result = self.rpc.call_instance_finalize_migration_src(source_node,
9119 msg = result.fail_msg
9121 logging.error("Instance migration succeeded, but finalization failed"
9122 " on the source node: %s", msg)
9123 raise errors.OpExecError("Could not finalize instance migration: %s" %
9126 instance.primary_node = target_node
9128 # distribute new instance config to the other nodes
9129 self.cfg.Update(instance, self.feedback_fn)
9131 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9135 msg = result.fail_msg
9137 logging.error("Instance migration succeeded, but finalization failed"
9138 " on the target node: %s", msg)
9139 raise errors.OpExecError("Could not finalize instance migration: %s" %
9142 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9143 self._EnsureSecondary(source_node)
9144 self._WaitUntilSync()
9145 self._GoStandalone()
9146 self._GoReconnect(False)
9147 self._WaitUntilSync()
9149 # If the instance's disk template is `rbd' or `ext' and there was a
9150 # successful migration, unmap the device from the source node.
9151 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9152 disks = _ExpandCheckDisks(instance, instance.disks)
9153 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9155 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9156 msg = result.fail_msg
9158 logging.error("Migration was successful, but couldn't unmap the"
9159 " block device %s on source node %s: %s",
9160 disk.iv_name, source_node, msg)
9161 logging.error("You need to unmap the device %s manually on %s",
9162 disk.iv_name, source_node)
9164 self.feedback_fn("* done")
9166 def _ExecFailover(self):
9167 """Failover an instance.
9169 The failover is done by shutting it down on its present node and
9170 starting it on the secondary.
9173 instance = self.instance
9174 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9176 source_node = instance.primary_node
9177 target_node = self.target_node
9179 if instance.admin_state == constants.ADMINST_UP:
9180 self.feedback_fn("* checking disk consistency between source and target")
9181 for (idx, dev) in enumerate(instance.disks):
9182 # for drbd, these are drbd over lvm
9183 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9185 if primary_node.offline:
9186 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9188 (primary_node.name, idx, target_node))
9189 elif not self.ignore_consistency:
9190 raise errors.OpExecError("Disk %s is degraded on target node,"
9191 " aborting failover" % idx)
9193 self.feedback_fn("* not checking disk consistency as instance is not"
9196 self.feedback_fn("* shutting down instance on source node")
9197 logging.info("Shutting down instance %s on node %s",
9198 instance.name, source_node)
9200 result = self.rpc.call_instance_shutdown(source_node, instance,
9201 self.shutdown_timeout)
9202 msg = result.fail_msg
9204 if self.ignore_consistency or primary_node.offline:
9205 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9206 " proceeding anyway; please make sure node"
9207 " %s is down; error details: %s",
9208 instance.name, source_node, source_node, msg)
9210 raise errors.OpExecError("Could not shutdown instance %s on"
9212 (instance.name, source_node, msg))
9214 self.feedback_fn("* deactivating the instance's disks on source node")
9215 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9216 raise errors.OpExecError("Can't shut down the instance's disks")
9218 instance.primary_node = target_node
9219 # distribute new instance config to the other nodes
9220 self.cfg.Update(instance, self.feedback_fn)
9222 # Only start the instance if it's marked as up
9223 if instance.admin_state == constants.ADMINST_UP:
9224 self.feedback_fn("* activating the instance's disks on target node %s" %
9226 logging.info("Starting instance %s on node %s",
9227 instance.name, target_node)
9229 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9230 ignore_secondaries=True)
9232 _ShutdownInstanceDisks(self.lu, instance)
9233 raise errors.OpExecError("Can't activate the instance's disks")
9235 self.feedback_fn("* starting the instance on the target node %s" %
9237 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9239 msg = result.fail_msg
9241 _ShutdownInstanceDisks(self.lu, instance)
9242 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9243 (instance.name, target_node, msg))
9245 def Exec(self, feedback_fn):
9246 """Perform the migration.
9249 self.feedback_fn = feedback_fn
9250 self.source_node = self.instance.primary_node
9252 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9253 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9254 self.target_node = self.instance.secondary_nodes[0]
9255 # Otherwise self.target_node has been populated either
9256 # directly, or through an iallocator.
9258 self.all_nodes = [self.source_node, self.target_node]
9259 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9260 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9263 feedback_fn("Failover instance %s" % self.instance.name)
9264 self._ExecFailover()
9266 feedback_fn("Migrating instance %s" % self.instance.name)
9269 return self._ExecCleanup()
9271 return self._ExecMigration()
9274 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9276 """Wrapper around L{_CreateBlockDevInner}.
9278 This method annotates the root device first.
9281 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9282 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9283 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9284 force_open, excl_stor)
9287 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9288 info, force_open, excl_stor):
9289 """Create a tree of block devices on a given node.
9291 If this device type has to be created on secondaries, create it and
9294 If not, just recurse to children keeping the same 'force' value.
9296 @attention: The device has to be annotated already.
9298 @param lu: the lu on whose behalf we execute
9299 @param node: the node on which to create the device
9300 @type instance: L{objects.Instance}
9301 @param instance: the instance which owns the device
9302 @type device: L{objects.Disk}
9303 @param device: the device to create
9304 @type force_create: boolean
9305 @param force_create: whether to force creation of this device; this
9306 will be change to True whenever we find a device which has
9307 CreateOnSecondary() attribute
9308 @param info: the extra 'metadata' we should attach to the device
9309 (this will be represented as a LVM tag)
9310 @type force_open: boolean
9311 @param force_open: this parameter will be passes to the
9312 L{backend.BlockdevCreate} function where it specifies
9313 whether we run on primary or not, and it affects both
9314 the child assembly and the device own Open() execution
9315 @type excl_stor: boolean
9316 @param excl_stor: Whether exclusive_storage is active for the node
9319 if device.CreateOnSecondary():
9323 for child in device.children:
9324 _CreateBlockDevInner(lu, node, instance, child, force_create,
9325 info, force_open, excl_stor)
9327 if not force_create:
9330 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9334 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9336 """Create a single block device on a given node.
9338 This will not recurse over children of the device, so they must be
9341 @param lu: the lu on whose behalf we execute
9342 @param node: the node on which to create the device
9343 @type instance: L{objects.Instance}
9344 @param instance: the instance which owns the device
9345 @type device: L{objects.Disk}
9346 @param device: the device to create
9347 @param info: the extra 'metadata' we should attach to the device
9348 (this will be represented as a LVM tag)
9349 @type force_open: boolean
9350 @param force_open: this parameter will be passes to the
9351 L{backend.BlockdevCreate} function where it specifies
9352 whether we run on primary or not, and it affects both
9353 the child assembly and the device own Open() execution
9354 @type excl_stor: boolean
9355 @param excl_stor: Whether exclusive_storage is active for the node
9358 lu.cfg.SetDiskID(device, node)
9359 result = lu.rpc.call_blockdev_create(node, device, device.size,
9360 instance.name, force_open, info,
9362 result.Raise("Can't create block device %s on"
9363 " node %s for instance %s" % (device, node, instance.name))
9364 if device.physical_id is None:
9365 device.physical_id = result.payload
9368 def _GenerateUniqueNames(lu, exts):
9369 """Generate a suitable LV name.
9371 This will generate a logical volume name for the given instance.
9376 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9377 results.append("%s%s" % (new_id, val))
9381 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9382 iv_name, p_minor, s_minor):
9383 """Generate a drbd8 device complete with its children.
9386 assert len(vgnames) == len(names) == 2
9387 port = lu.cfg.AllocatePort()
9388 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9390 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9391 logical_id=(vgnames[0], names[0]),
9393 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9394 size=constants.DRBD_META_SIZE,
9395 logical_id=(vgnames[1], names[1]),
9397 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9398 logical_id=(primary, secondary, port,
9401 children=[dev_data, dev_meta],
9402 iv_name=iv_name, params={})
9406 _DISK_TEMPLATE_NAME_PREFIX = {
9407 constants.DT_PLAIN: "",
9408 constants.DT_RBD: ".rbd",
9409 constants.DT_EXT: ".ext",
9413 _DISK_TEMPLATE_DEVICE_TYPE = {
9414 constants.DT_PLAIN: constants.LD_LV,
9415 constants.DT_FILE: constants.LD_FILE,
9416 constants.DT_SHARED_FILE: constants.LD_FILE,
9417 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9418 constants.DT_RBD: constants.LD_RBD,
9419 constants.DT_EXT: constants.LD_EXT,
9423 def _GenerateDiskTemplate(
9424 lu, template_name, instance_name, primary_node, secondary_nodes,
9425 disk_info, file_storage_dir, file_driver, base_index,
9426 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9427 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9428 """Generate the entire disk layout for a given template type.
9431 vgname = lu.cfg.GetVGName()
9432 disk_count = len(disk_info)
9435 if template_name == constants.DT_DISKLESS:
9437 elif template_name == constants.DT_DRBD8:
9438 if len(secondary_nodes) != 1:
9439 raise errors.ProgrammerError("Wrong template configuration")
9440 remote_node = secondary_nodes[0]
9441 minors = lu.cfg.AllocateDRBDMinor(
9442 [primary_node, remote_node] * len(disk_info), instance_name)
9444 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9446 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9449 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9450 for i in range(disk_count)]):
9451 names.append(lv_prefix + "_data")
9452 names.append(lv_prefix + "_meta")
9453 for idx, disk in enumerate(disk_info):
9454 disk_index = idx + base_index
9455 data_vg = disk.get(constants.IDISK_VG, vgname)
9456 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9457 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9458 disk[constants.IDISK_SIZE],
9460 names[idx * 2:idx * 2 + 2],
9461 "disk/%d" % disk_index,
9462 minors[idx * 2], minors[idx * 2 + 1])
9463 disk_dev.mode = disk[constants.IDISK_MODE]
9464 disks.append(disk_dev)
9467 raise errors.ProgrammerError("Wrong template configuration")
9469 if template_name == constants.DT_FILE:
9471 elif template_name == constants.DT_SHARED_FILE:
9472 _req_shr_file_storage()
9474 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9475 if name_prefix is None:
9478 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9479 (name_prefix, base_index + i)
9480 for i in range(disk_count)])
9482 if template_name == constants.DT_PLAIN:
9484 def logical_id_fn(idx, _, disk):
9485 vg = disk.get(constants.IDISK_VG, vgname)
9486 return (vg, names[idx])
9488 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9490 lambda _, disk_index, disk: (file_driver,
9491 "%s/disk%d" % (file_storage_dir,
9493 elif template_name == constants.DT_BLOCK:
9495 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9496 disk[constants.IDISK_ADOPT])
9497 elif template_name == constants.DT_RBD:
9498 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9499 elif template_name == constants.DT_EXT:
9500 def logical_id_fn(idx, _, disk):
9501 provider = disk.get(constants.IDISK_PROVIDER, None)
9502 if provider is None:
9503 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9504 " not found", constants.DT_EXT,
9505 constants.IDISK_PROVIDER)
9506 return (provider, names[idx])
9508 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9510 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9512 for idx, disk in enumerate(disk_info):
9514 # Only for the Ext template add disk_info to params
9515 if template_name == constants.DT_EXT:
9516 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9518 if key not in constants.IDISK_PARAMS:
9519 params[key] = disk[key]
9520 disk_index = idx + base_index
9521 size = disk[constants.IDISK_SIZE]
9522 feedback_fn("* disk %s, size %s" %
9523 (disk_index, utils.FormatUnit(size, "h")))
9524 disks.append(objects.Disk(dev_type=dev_type, size=size,
9525 logical_id=logical_id_fn(idx, disk_index, disk),
9526 iv_name="disk/%d" % disk_index,
9527 mode=disk[constants.IDISK_MODE],
9533 def _GetInstanceInfoText(instance):
9534 """Compute that text that should be added to the disk's metadata.
9537 return "originstname+%s" % instance.name
9540 def _CalcEta(time_taken, written, total_size):
9541 """Calculates the ETA based on size written and total size.
9543 @param time_taken: The time taken so far
9544 @param written: amount written so far
9545 @param total_size: The total size of data to be written
9546 @return: The remaining time in seconds
9549 avg_time = time_taken / float(written)
9550 return (total_size - written) * avg_time
9553 def _WipeDisks(lu, instance, disks=None):
9554 """Wipes instance disks.
9556 @type lu: L{LogicalUnit}
9557 @param lu: the logical unit on whose behalf we execute
9558 @type instance: L{objects.Instance}
9559 @param instance: the instance whose disks we should create
9560 @return: the success of the wipe
9563 node = instance.primary_node
9566 disks = [(idx, disk, 0)
9567 for (idx, disk) in enumerate(instance.disks)]
9569 for (_, device, _) in disks:
9570 lu.cfg.SetDiskID(device, node)
9572 logging.info("Pausing synchronization of disks of instance '%s'",
9574 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9575 (map(compat.snd, disks),
9578 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9580 for idx, success in enumerate(result.payload):
9582 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9583 " failed", idx, instance.name)
9586 for (idx, device, offset) in disks:
9587 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9588 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9590 int(min(constants.MAX_WIPE_CHUNK,
9591 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9595 start_time = time.time()
9600 info_text = (" (from %s to %s)" %
9601 (utils.FormatUnit(offset, "h"),
9602 utils.FormatUnit(size, "h")))
9604 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9606 logging.info("Wiping disk %d for instance %s on node %s using"
9607 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9609 while offset < size:
9610 wipe_size = min(wipe_chunk_size, size - offset)
9612 logging.debug("Wiping disk %d, offset %s, chunk %s",
9613 idx, offset, wipe_size)
9615 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9617 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9618 (idx, offset, wipe_size))
9622 if now - last_output >= 60:
9623 eta = _CalcEta(now - start_time, offset, size)
9624 lu.LogInfo(" - done: %.1f%% ETA: %s",
9625 offset / float(size) * 100, utils.FormatSeconds(eta))
9628 logging.info("Resuming synchronization of disks for instance '%s'",
9631 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9632 (map(compat.snd, disks),
9637 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9638 node, result.fail_msg)
9640 for idx, success in enumerate(result.payload):
9642 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9643 " failed", idx, instance.name)
9646 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9647 """Create all disks for an instance.
9649 This abstracts away some work from AddInstance.
9651 @type lu: L{LogicalUnit}
9652 @param lu: the logical unit on whose behalf we execute
9653 @type instance: L{objects.Instance}
9654 @param instance: the instance whose disks we should create
9656 @param to_skip: list of indices to skip
9657 @type target_node: string
9658 @param target_node: if passed, overrides the target node for creation
9660 @return: the success of the creation
9663 info = _GetInstanceInfoText(instance)
9664 if target_node is None:
9665 pnode = instance.primary_node
9666 all_nodes = instance.all_nodes
9671 if instance.disk_template in constants.DTS_FILEBASED:
9672 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9673 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9675 result.Raise("Failed to create directory '%s' on"
9676 " node %s" % (file_storage_dir, pnode))
9678 # Note: this needs to be kept in sync with adding of disks in
9679 # LUInstanceSetParams
9680 for idx, device in enumerate(instance.disks):
9681 if to_skip and idx in to_skip:
9683 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9685 for node in all_nodes:
9686 f_create = node == pnode
9687 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9690 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9691 """Remove all disks for an instance.
9693 This abstracts away some work from `AddInstance()` and
9694 `RemoveInstance()`. Note that in case some of the devices couldn't
9695 be removed, the removal will continue with the other ones (compare
9696 with `_CreateDisks()`).
9698 @type lu: L{LogicalUnit}
9699 @param lu: the logical unit on whose behalf we execute
9700 @type instance: L{objects.Instance}
9701 @param instance: the instance whose disks we should remove
9702 @type target_node: string
9703 @param target_node: used to override the node on which to remove the disks
9705 @return: the success of the removal
9708 logging.info("Removing block devices for instance %s", instance.name)
9711 ports_to_release = set()
9712 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9713 for (idx, device) in enumerate(anno_disks):
9715 edata = [(target_node, device)]
9717 edata = device.ComputeNodeTree(instance.primary_node)
9718 for node, disk in edata:
9719 lu.cfg.SetDiskID(disk, node)
9720 result = lu.rpc.call_blockdev_remove(node, disk)
9722 lu.LogWarning("Could not remove disk %s on node %s,"
9723 " continuing anyway: %s", idx, node, result.fail_msg)
9724 if not (result.offline and node != instance.primary_node):
9727 # if this is a DRBD disk, return its port to the pool
9728 if device.dev_type in constants.LDS_DRBD:
9729 ports_to_release.add(device.logical_id[2])
9731 if all_result or ignore_failures:
9732 for port in ports_to_release:
9733 lu.cfg.AddTcpUdpPort(port)
9735 if instance.disk_template in constants.DTS_FILEBASED:
9736 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9740 tgt = instance.primary_node
9741 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9743 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9744 file_storage_dir, instance.primary_node, result.fail_msg)
9750 def _ComputeDiskSizePerVG(disk_template, disks):
9751 """Compute disk size requirements in the volume group
9754 def _compute(disks, payload):
9755 """Universal algorithm.
9760 vgs[disk[constants.IDISK_VG]] = \
9761 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9765 # Required free disk space as a function of disk and swap space
9767 constants.DT_DISKLESS: {},
9768 constants.DT_PLAIN: _compute(disks, 0),
9769 # 128 MB are added for drbd metadata for each disk
9770 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9771 constants.DT_FILE: {},
9772 constants.DT_SHARED_FILE: {},
9775 if disk_template not in req_size_dict:
9776 raise errors.ProgrammerError("Disk template '%s' size requirement"
9777 " is unknown" % disk_template)
9779 return req_size_dict[disk_template]
9782 def _FilterVmNodes(lu, nodenames):
9783 """Filters out non-vm_capable nodes from a list.
9785 @type lu: L{LogicalUnit}
9786 @param lu: the logical unit for which we check
9787 @type nodenames: list
9788 @param nodenames: the list of nodes on which we should check
9790 @return: the list of vm-capable nodes
9793 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9794 return [name for name in nodenames if name not in vm_nodes]
9797 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9798 """Hypervisor parameter validation.
9800 This function abstract the hypervisor parameter validation to be
9801 used in both instance create and instance modify.
9803 @type lu: L{LogicalUnit}
9804 @param lu: the logical unit for which we check
9805 @type nodenames: list
9806 @param nodenames: the list of nodes on which we should check
9807 @type hvname: string
9808 @param hvname: the name of the hypervisor we should use
9809 @type hvparams: dict
9810 @param hvparams: the parameters which we need to check
9811 @raise errors.OpPrereqError: if the parameters are not valid
9814 nodenames = _FilterVmNodes(lu, nodenames)
9816 cluster = lu.cfg.GetClusterInfo()
9817 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9819 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9820 for node in nodenames:
9824 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9827 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9828 """OS parameters validation.
9830 @type lu: L{LogicalUnit}
9831 @param lu: the logical unit for which we check
9832 @type required: boolean
9833 @param required: whether the validation should fail if the OS is not
9835 @type nodenames: list
9836 @param nodenames: the list of nodes on which we should check
9837 @type osname: string
9838 @param osname: the name of the hypervisor we should use
9839 @type osparams: dict
9840 @param osparams: the parameters which we need to check
9841 @raise errors.OpPrereqError: if the parameters are not valid
9844 nodenames = _FilterVmNodes(lu, nodenames)
9845 result = lu.rpc.call_os_validate(nodenames, required, osname,
9846 [constants.OS_VALIDATE_PARAMETERS],
9848 for node, nres in result.items():
9849 # we don't check for offline cases since this should be run only
9850 # against the master node and/or an instance's nodes
9851 nres.Raise("OS Parameters validation failed on node %s" % node)
9852 if not nres.payload:
9853 lu.LogInfo("OS %s not found on node %s, validation skipped",
9857 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9858 """Wrapper around IAReqInstanceAlloc.
9860 @param op: The instance opcode
9861 @param disks: The computed disks
9862 @param nics: The computed nics
9863 @param beparams: The full filled beparams
9864 @param node_whitelist: List of nodes which should appear as online to the
9865 allocator (unless the node is already marked offline)
9867 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9870 spindle_use = beparams[constants.BE_SPINDLE_USE]
9871 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9872 disk_template=op.disk_template,
9875 vcpus=beparams[constants.BE_VCPUS],
9876 memory=beparams[constants.BE_MAXMEM],
9877 spindle_use=spindle_use,
9879 nics=[n.ToDict() for n in nics],
9880 hypervisor=op.hypervisor,
9881 node_whitelist=node_whitelist)
9884 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9885 """Computes the nics.
9887 @param op: The instance opcode
9888 @param cluster: Cluster configuration object
9889 @param default_ip: The default ip to assign
9890 @param cfg: An instance of the configuration object
9891 @param ec_id: Execution context ID
9893 @returns: The build up nics
9898 nic_mode_req = nic.get(constants.INIC_MODE, None)
9899 nic_mode = nic_mode_req
9900 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9901 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9903 net = nic.get(constants.INIC_NETWORK, None)
9904 link = nic.get(constants.NIC_LINK, None)
9905 ip = nic.get(constants.INIC_IP, None)
9907 if net is None or net.lower() == constants.VALUE_NONE:
9910 if nic_mode_req is not None or link is not None:
9911 raise errors.OpPrereqError("If network is given, no mode or link"
9912 " is allowed to be passed",
9915 # ip validity checks
9916 if ip is None or ip.lower() == constants.VALUE_NONE:
9918 elif ip.lower() == constants.VALUE_AUTO:
9919 if not op.name_check:
9920 raise errors.OpPrereqError("IP address set to auto but name checks"
9921 " have been skipped",
9925 # We defer pool operations until later, so that the iallocator has
9926 # filled in the instance's node(s) dimara
9927 if ip.lower() == constants.NIC_IP_POOL:
9929 raise errors.OpPrereqError("if ip=pool, parameter network"
9930 " must be passed too",
9933 elif not netutils.IPAddress.IsValid(ip):
9934 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9939 # TODO: check the ip address for uniqueness
9940 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9941 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9944 # MAC address verification
9945 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9946 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947 mac = utils.NormalizeAndValidateMac(mac)
9950 # TODO: We need to factor this out
9951 cfg.ReserveMAC(mac, ec_id)
9952 except errors.ReservationError:
9953 raise errors.OpPrereqError("MAC address %s already in use"
9954 " in cluster" % mac,
9955 errors.ECODE_NOTUNIQUE)
9957 # Build nic parameters
9960 nicparams[constants.NIC_MODE] = nic_mode
9962 nicparams[constants.NIC_LINK] = link
9964 check_params = cluster.SimpleFillNIC(nicparams)
9965 objects.NIC.CheckParameterSyntax(check_params)
9966 net_uuid = cfg.LookupNetwork(net)
9967 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968 network=net_uuid, nicparams=nicparams))
9973 def _ComputeDisks(op, default_vg):
9974 """Computes the instance disks.
9976 @param op: The instance opcode
9977 @param default_vg: The default_vg to assume
9979 @return: The computed disks
9983 for disk in op.disks:
9984 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985 if mode not in constants.DISK_ACCESS_SET:
9986 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987 mode, errors.ECODE_INVAL)
9988 size = disk.get(constants.IDISK_SIZE, None)
9990 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9993 except (TypeError, ValueError):
9994 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9997 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998 if ext_provider and op.disk_template != constants.DT_EXT:
9999 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000 " disk template, not %s" %
10001 (constants.IDISK_PROVIDER, constants.DT_EXT,
10002 op.disk_template), errors.ECODE_INVAL)
10004 data_vg = disk.get(constants.IDISK_VG, default_vg)
10006 constants.IDISK_SIZE: size,
10007 constants.IDISK_MODE: mode,
10008 constants.IDISK_VG: data_vg,
10011 if constants.IDISK_METAVG in disk:
10012 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013 if constants.IDISK_ADOPT in disk:
10014 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10016 # For extstorage, demand the `provider' option and add any
10017 # additional parameters (ext-params) to the dict
10018 if op.disk_template == constants.DT_EXT:
10020 new_disk[constants.IDISK_PROVIDER] = ext_provider
10022 if key not in constants.IDISK_PARAMS:
10023 new_disk[key] = disk[key]
10025 raise errors.OpPrereqError("Missing provider for template '%s'" %
10026 constants.DT_EXT, errors.ECODE_INVAL)
10028 disks.append(new_disk)
10033 def _ComputeFullBeParams(op, cluster):
10034 """Computes the full beparams.
10036 @param op: The instance opcode
10037 @param cluster: The cluster config object
10039 @return: The fully filled beparams
10042 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043 for param, value in op.beparams.iteritems():
10044 if value == constants.VALUE_AUTO:
10045 op.beparams[param] = default_beparams[param]
10046 objects.UpgradeBeParams(op.beparams)
10047 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048 return cluster.SimpleFillBE(op.beparams)
10051 def _CheckOpportunisticLocking(op):
10052 """Generate error if opportunistic locking is not possible.
10055 if op.opportunistic_locking and not op.iallocator:
10056 raise errors.OpPrereqError("Opportunistic locking is only available in"
10057 " combination with an instance allocator",
10058 errors.ECODE_INVAL)
10061 class LUInstanceCreate(LogicalUnit):
10062 """Create an instance.
10065 HPATH = "instance-add"
10066 HTYPE = constants.HTYPE_INSTANCE
10069 def CheckArguments(self):
10070 """Check arguments.
10073 # do not require name_check to ease forward/backward compatibility
10075 if self.op.no_install and self.op.start:
10076 self.LogInfo("No-installation mode selected, disabling startup")
10077 self.op.start = False
10078 # validate/normalize the instance name
10079 self.op.instance_name = \
10080 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10082 if self.op.ip_check and not self.op.name_check:
10083 # TODO: make the ip check more flexible and not depend on the name check
10084 raise errors.OpPrereqError("Cannot do IP address check without a name"
10085 " check", errors.ECODE_INVAL)
10087 # check nics' parameter names
10088 for nic in self.op.nics:
10089 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10091 # check disks. parameter names and consistent adopt/no-adopt strategy
10092 has_adopt = has_no_adopt = False
10093 for disk in self.op.disks:
10094 if self.op.disk_template != constants.DT_EXT:
10095 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096 if constants.IDISK_ADOPT in disk:
10099 has_no_adopt = True
10100 if has_adopt and has_no_adopt:
10101 raise errors.OpPrereqError("Either all disks are adopted or none is",
10102 errors.ECODE_INVAL)
10104 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105 raise errors.OpPrereqError("Disk adoption is not supported for the"
10106 " '%s' disk template" %
10107 self.op.disk_template,
10108 errors.ECODE_INVAL)
10109 if self.op.iallocator is not None:
10110 raise errors.OpPrereqError("Disk adoption not allowed with an"
10111 " iallocator script", errors.ECODE_INVAL)
10112 if self.op.mode == constants.INSTANCE_IMPORT:
10113 raise errors.OpPrereqError("Disk adoption not allowed for"
10114 " instance import", errors.ECODE_INVAL)
10116 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118 " but no 'adopt' parameter given" %
10119 self.op.disk_template,
10120 errors.ECODE_INVAL)
10122 self.adopt_disks = has_adopt
10124 # instance name verification
10125 if self.op.name_check:
10126 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127 self.op.instance_name = self.hostname1.name
10128 # used in CheckPrereq for ip ping check
10129 self.check_ip = self.hostname1.ip
10131 self.check_ip = None
10133 # file storage checks
10134 if (self.op.file_driver and
10135 not self.op.file_driver in constants.FILE_DRIVER):
10136 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137 self.op.file_driver, errors.ECODE_INVAL)
10139 if self.op.disk_template == constants.DT_FILE:
10140 opcodes.RequireFileStorage()
10141 elif self.op.disk_template == constants.DT_SHARED_FILE:
10142 opcodes.RequireSharedFileStorage()
10144 ### Node/iallocator related checks
10145 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10147 if self.op.pnode is not None:
10148 if self.op.disk_template in constants.DTS_INT_MIRROR:
10149 if self.op.snode is None:
10150 raise errors.OpPrereqError("The networked disk templates need"
10151 " a mirror node", errors.ECODE_INVAL)
10152 elif self.op.snode:
10153 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10155 self.op.snode = None
10157 _CheckOpportunisticLocking(self.op)
10159 self._cds = _GetClusterDomainSecret()
10161 if self.op.mode == constants.INSTANCE_IMPORT:
10162 # On import force_variant must be True, because if we forced it at
10163 # initial install, our only chance when importing it back is that it
10165 self.op.force_variant = True
10167 if self.op.no_install:
10168 self.LogInfo("No-installation mode has no effect during import")
10170 elif self.op.mode == constants.INSTANCE_CREATE:
10171 if self.op.os_type is None:
10172 raise errors.OpPrereqError("No guest OS specified",
10173 errors.ECODE_INVAL)
10174 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176 " installation" % self.op.os_type,
10177 errors.ECODE_STATE)
10178 if self.op.disk_template is None:
10179 raise errors.OpPrereqError("No disk template specified",
10180 errors.ECODE_INVAL)
10182 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183 # Check handshake to ensure both clusters have the same domain secret
10184 src_handshake = self.op.source_handshake
10185 if not src_handshake:
10186 raise errors.OpPrereqError("Missing source handshake",
10187 errors.ECODE_INVAL)
10189 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10192 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193 errors.ECODE_INVAL)
10195 # Load and check source CA
10196 self.source_x509_ca_pem = self.op.source_x509_ca
10197 if not self.source_x509_ca_pem:
10198 raise errors.OpPrereqError("Missing source X509 CA",
10199 errors.ECODE_INVAL)
10202 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10204 except OpenSSL.crypto.Error, err:
10205 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206 (err, ), errors.ECODE_INVAL)
10208 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209 if errcode is not None:
10210 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211 errors.ECODE_INVAL)
10213 self.source_x509_ca = cert
10215 src_instance_name = self.op.source_instance_name
10216 if not src_instance_name:
10217 raise errors.OpPrereqError("Missing source instance name",
10218 errors.ECODE_INVAL)
10220 self.source_instance_name = \
10221 netutils.GetHostname(name=src_instance_name).name
10224 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225 self.op.mode, errors.ECODE_INVAL)
10227 def ExpandNames(self):
10228 """ExpandNames for CreateInstance.
10230 Figure out the right locks for instance creation.
10233 self.needed_locks = {}
10235 instance_name = self.op.instance_name
10236 # this is just a preventive check, but someone might still add this
10237 # instance in the meantime, and creation will fail at lock-add time
10238 if instance_name in self.cfg.GetInstanceList():
10239 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240 instance_name, errors.ECODE_EXISTS)
10242 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10244 if self.op.iallocator:
10245 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246 # specifying a group on instance creation and then selecting nodes from
10248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10251 if self.op.opportunistic_locking:
10252 self.opportunistic_locks[locking.LEVEL_NODE] = True
10253 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10255 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256 nodelist = [self.op.pnode]
10257 if self.op.snode is not None:
10258 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259 nodelist.append(self.op.snode)
10260 self.needed_locks[locking.LEVEL_NODE] = nodelist
10262 # in case of import lock the source node too
10263 if self.op.mode == constants.INSTANCE_IMPORT:
10264 src_node = self.op.src_node
10265 src_path = self.op.src_path
10267 if src_path is None:
10268 self.op.src_path = src_path = self.op.instance_name
10270 if src_node is None:
10271 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273 self.op.src_node = None
10274 if os.path.isabs(src_path):
10275 raise errors.OpPrereqError("Importing an instance from a path"
10276 " requires a source node option",
10277 errors.ECODE_INVAL)
10279 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282 if not os.path.isabs(src_path):
10283 self.op.src_path = src_path = \
10284 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10286 self.needed_locks[locking.LEVEL_NODE_RES] = \
10287 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10289 def _RunAllocator(self):
10290 """Run the allocator based on input opcode.
10293 if self.op.opportunistic_locking:
10294 # Only consider nodes for which a lock is held
10295 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10297 node_whitelist = None
10299 #TODO Export network to iallocator so that it chooses a pnode
10300 # in a nodegroup that has the desired network connected to
10301 req = _CreateInstanceAllocRequest(self.op, self.disks,
10302 self.nics, self.be_full,
10304 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10306 ial.Run(self.op.iallocator)
10308 if not ial.success:
10309 # When opportunistic locks are used only a temporary failure is generated
10310 if self.op.opportunistic_locking:
10311 ecode = errors.ECODE_TEMP_NORES
10313 ecode = errors.ECODE_NORES
10315 raise errors.OpPrereqError("Can't compute nodes using"
10316 " iallocator '%s': %s" %
10317 (self.op.iallocator, ial.info),
10320 self.op.pnode = ial.result[0]
10321 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322 self.op.instance_name, self.op.iallocator,
10323 utils.CommaJoin(ial.result))
10325 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10327 if req.RequiredNodes() == 2:
10328 self.op.snode = ial.result[1]
10330 def BuildHooksEnv(self):
10331 """Build hooks env.
10333 This runs on master, primary and secondary nodes of the instance.
10337 "ADD_MODE": self.op.mode,
10339 if self.op.mode == constants.INSTANCE_IMPORT:
10340 env["SRC_NODE"] = self.op.src_node
10341 env["SRC_PATH"] = self.op.src_path
10342 env["SRC_IMAGES"] = self.src_images
10344 env.update(_BuildInstanceHookEnv(
10345 name=self.op.instance_name,
10346 primary_node=self.op.pnode,
10347 secondary_nodes=self.secondaries,
10348 status=self.op.start,
10349 os_type=self.op.os_type,
10350 minmem=self.be_full[constants.BE_MINMEM],
10351 maxmem=self.be_full[constants.BE_MAXMEM],
10352 vcpus=self.be_full[constants.BE_VCPUS],
10353 nics=_NICListToTuple(self, self.nics),
10354 disk_template=self.op.disk_template,
10355 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356 for d in self.disks],
10359 hypervisor_name=self.op.hypervisor,
10365 def BuildHooksNodes(self):
10366 """Build hooks nodes.
10369 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10372 def _ReadExportInfo(self):
10373 """Reads the export information from disk.
10375 It will override the opcode source node and path with the actual
10376 information, if these two were not specified before.
10378 @return: the export information
10381 assert self.op.mode == constants.INSTANCE_IMPORT
10383 src_node = self.op.src_node
10384 src_path = self.op.src_path
10386 if src_node is None:
10387 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388 exp_list = self.rpc.call_export_list(locked_nodes)
10390 for node in exp_list:
10391 if exp_list[node].fail_msg:
10393 if src_path in exp_list[node].payload:
10395 self.op.src_node = src_node = node
10396 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10400 raise errors.OpPrereqError("No export found for relative path %s" %
10401 src_path, errors.ECODE_INVAL)
10403 _CheckNodeOnline(self, src_node)
10404 result = self.rpc.call_export_info(src_node, src_path)
10405 result.Raise("No export or invalid export found in dir %s" % src_path)
10407 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408 if not export_info.has_section(constants.INISECT_EXP):
10409 raise errors.ProgrammerError("Corrupted export config",
10410 errors.ECODE_ENVIRON)
10412 ei_version = export_info.get(constants.INISECT_EXP, "version")
10413 if (int(ei_version) != constants.EXPORT_VERSION):
10414 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415 (ei_version, constants.EXPORT_VERSION),
10416 errors.ECODE_ENVIRON)
10419 def _ReadExportParams(self, einfo):
10420 """Use export parameters as defaults.
10422 In case the opcode doesn't specify (as in override) some instance
10423 parameters, then try to use them from the export information, if
10424 that declares them.
10427 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10429 if self.op.disk_template is None:
10430 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431 self.op.disk_template = einfo.get(constants.INISECT_INS,
10433 if self.op.disk_template not in constants.DISK_TEMPLATES:
10434 raise errors.OpPrereqError("Disk template specified in configuration"
10435 " file is not one of the allowed values:"
10437 " ".join(constants.DISK_TEMPLATES),
10438 errors.ECODE_INVAL)
10440 raise errors.OpPrereqError("No disk template specified and the export"
10441 " is missing the disk_template information",
10442 errors.ECODE_INVAL)
10444 if not self.op.disks:
10446 # TODO: import the disk iv_name too
10447 for idx in range(constants.MAX_DISKS):
10448 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450 disks.append({constants.IDISK_SIZE: disk_sz})
10451 self.op.disks = disks
10452 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453 raise errors.OpPrereqError("No disk info specified and the export"
10454 " is missing the disk information",
10455 errors.ECODE_INVAL)
10457 if not self.op.nics:
10459 for idx in range(constants.MAX_NICS):
10460 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10462 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10468 self.op.nics = nics
10470 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10473 if (self.op.hypervisor is None and
10474 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10477 if einfo.has_section(constants.INISECT_HYP):
10478 # use the export parameters but do not override the ones
10479 # specified by the user
10480 for name, value in einfo.items(constants.INISECT_HYP):
10481 if name not in self.op.hvparams:
10482 self.op.hvparams[name] = value
10484 if einfo.has_section(constants.INISECT_BEP):
10485 # use the parameters, without overriding
10486 for name, value in einfo.items(constants.INISECT_BEP):
10487 if name not in self.op.beparams:
10488 self.op.beparams[name] = value
10489 # Compatibility for the old "memory" be param
10490 if name == constants.BE_MEMORY:
10491 if constants.BE_MAXMEM not in self.op.beparams:
10492 self.op.beparams[constants.BE_MAXMEM] = value
10493 if constants.BE_MINMEM not in self.op.beparams:
10494 self.op.beparams[constants.BE_MINMEM] = value
10496 # try to read the parameters old style, from the main section
10497 for name in constants.BES_PARAMETERS:
10498 if (name not in self.op.beparams and
10499 einfo.has_option(constants.INISECT_INS, name)):
10500 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10502 if einfo.has_section(constants.INISECT_OSP):
10503 # use the parameters, without overriding
10504 for name, value in einfo.items(constants.INISECT_OSP):
10505 if name not in self.op.osparams:
10506 self.op.osparams[name] = value
10508 def _RevertToDefaults(self, cluster):
10509 """Revert the instance parameters to the default values.
10513 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514 for name in self.op.hvparams.keys():
10515 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516 del self.op.hvparams[name]
10518 be_defs = cluster.SimpleFillBE({})
10519 for name in self.op.beparams.keys():
10520 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521 del self.op.beparams[name]
10523 nic_defs = cluster.SimpleFillNIC({})
10524 for nic in self.op.nics:
10525 for name in constants.NICS_PARAMETERS:
10526 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10529 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530 for name in self.op.osparams.keys():
10531 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532 del self.op.osparams[name]
10534 def _CalculateFileStorageDir(self):
10535 """Calculate final instance file storage dir.
10538 # file storage dir calculation/check
10539 self.instance_file_storage_dir = None
10540 if self.op.disk_template in constants.DTS_FILEBASED:
10541 # build the full file storage dir path
10544 if self.op.disk_template == constants.DT_SHARED_FILE:
10545 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10547 get_fsd_fn = self.cfg.GetFileStorageDir
10549 cfg_storagedir = get_fsd_fn()
10550 if not cfg_storagedir:
10551 raise errors.OpPrereqError("Cluster file storage dir not defined",
10552 errors.ECODE_STATE)
10553 joinargs.append(cfg_storagedir)
10555 if self.op.file_storage_dir is not None:
10556 joinargs.append(self.op.file_storage_dir)
10558 joinargs.append(self.op.instance_name)
10560 # pylint: disable=W0142
10561 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10563 def CheckPrereq(self): # pylint: disable=R0914
10564 """Check prerequisites.
10567 self._CalculateFileStorageDir()
10569 if self.op.mode == constants.INSTANCE_IMPORT:
10570 export_info = self._ReadExportInfo()
10571 self._ReadExportParams(export_info)
10572 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10574 self._old_instance_name = None
10576 if (not self.cfg.GetVGName() and
10577 self.op.disk_template not in constants.DTS_NOT_LVM):
10578 raise errors.OpPrereqError("Cluster does not support lvm-based"
10579 " instances", errors.ECODE_STATE)
10581 if (self.op.hypervisor is None or
10582 self.op.hypervisor == constants.VALUE_AUTO):
10583 self.op.hypervisor = self.cfg.GetHypervisorType()
10585 cluster = self.cfg.GetClusterInfo()
10586 enabled_hvs = cluster.enabled_hypervisors
10587 if self.op.hypervisor not in enabled_hvs:
10588 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10590 (self.op.hypervisor, ",".join(enabled_hvs)),
10591 errors.ECODE_STATE)
10593 # Check tag validity
10594 for tag in self.op.tags:
10595 objects.TaggableObject.ValidateTag(tag)
10597 # check hypervisor parameter syntax (locally)
10598 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10601 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602 hv_type.CheckParameterSyntax(filled_hvp)
10603 self.hv_full = filled_hvp
10604 # check that we don't specify global parameters on an instance
10605 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10606 "instance", "cluster")
10608 # fill and remember the beparams dict
10609 self.be_full = _ComputeFullBeParams(self.op, cluster)
10611 # build os parameters
10612 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10614 # now that hvp/bep are in final format, let's reset to defaults,
10616 if self.op.identify_defaults:
10617 self._RevertToDefaults(cluster)
10620 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10621 self.proc.GetECId())
10623 # disk checks/pre-build
10624 default_vg = self.cfg.GetVGName()
10625 self.disks = _ComputeDisks(self.op, default_vg)
10627 if self.op.mode == constants.INSTANCE_IMPORT:
10629 for idx in range(len(self.disks)):
10630 option = "disk%d_dump" % idx
10631 if export_info.has_option(constants.INISECT_INS, option):
10632 # FIXME: are the old os-es, disk sizes, etc. useful?
10633 export_name = export_info.get(constants.INISECT_INS, option)
10634 image = utils.PathJoin(self.op.src_path, export_name)
10635 disk_images.append(image)
10637 disk_images.append(False)
10639 self.src_images = disk_images
10641 if self.op.instance_name == self._old_instance_name:
10642 for idx, nic in enumerate(self.nics):
10643 if nic.mac == constants.VALUE_AUTO:
10644 nic_mac_ini = "nic%d_mac" % idx
10645 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10647 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10649 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10650 if self.op.ip_check:
10651 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10652 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10653 (self.check_ip, self.op.instance_name),
10654 errors.ECODE_NOTUNIQUE)
10656 #### mac address generation
10657 # By generating here the mac address both the allocator and the hooks get
10658 # the real final mac address rather than the 'auto' or 'generate' value.
10659 # There is a race condition between the generation and the instance object
10660 # creation, which means that we know the mac is valid now, but we're not
10661 # sure it will be when we actually add the instance. If things go bad
10662 # adding the instance will abort because of a duplicate mac, and the
10663 # creation job will fail.
10664 for nic in self.nics:
10665 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10666 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10670 if self.op.iallocator is not None:
10671 self._RunAllocator()
10673 # Release all unneeded node locks
10674 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10675 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10676 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10677 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10679 assert (self.owned_locks(locking.LEVEL_NODE) ==
10680 self.owned_locks(locking.LEVEL_NODE_RES)), \
10681 "Node locks differ from node resource locks"
10683 #### node related checks
10685 # check primary node
10686 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10687 assert self.pnode is not None, \
10688 "Cannot retrieve locked node %s" % self.op.pnode
10690 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10691 pnode.name, errors.ECODE_STATE)
10693 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10694 pnode.name, errors.ECODE_STATE)
10695 if not pnode.vm_capable:
10696 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10697 " '%s'" % pnode.name, errors.ECODE_STATE)
10699 self.secondaries = []
10701 # Fill in any IPs from IP pools. This must happen here, because we need to
10702 # know the nic's primary node, as specified by the iallocator
10703 for idx, nic in enumerate(self.nics):
10704 net_uuid = nic.network
10705 if net_uuid is not None:
10706 nobj = self.cfg.GetNetwork(net_uuid)
10707 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10708 if netparams is None:
10709 raise errors.OpPrereqError("No netparams found for network"
10710 " %s. Propably not connected to"
10711 " node's %s nodegroup" %
10712 (nobj.name, self.pnode.name),
10713 errors.ECODE_INVAL)
10714 self.LogInfo("NIC/%d inherits netparams %s" %
10715 (idx, netparams.values()))
10716 nic.nicparams = dict(netparams)
10717 if nic.ip is not None:
10718 if nic.ip.lower() == constants.NIC_IP_POOL:
10720 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10721 except errors.ReservationError:
10722 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10723 " from the address pool" % idx,
10724 errors.ECODE_STATE)
10725 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10728 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10729 except errors.ReservationError:
10730 raise errors.OpPrereqError("IP address %s already in use"
10731 " or does not belong to network %s" %
10732 (nic.ip, nobj.name),
10733 errors.ECODE_NOTUNIQUE)
10735 # net is None, ip None or given
10736 elif self.op.conflicts_check:
10737 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10739 # mirror node verification
10740 if self.op.disk_template in constants.DTS_INT_MIRROR:
10741 if self.op.snode == pnode.name:
10742 raise errors.OpPrereqError("The secondary node cannot be the"
10743 " primary node", errors.ECODE_INVAL)
10744 _CheckNodeOnline(self, self.op.snode)
10745 _CheckNodeNotDrained(self, self.op.snode)
10746 _CheckNodeVmCapable(self, self.op.snode)
10747 self.secondaries.append(self.op.snode)
10749 snode = self.cfg.GetNodeInfo(self.op.snode)
10750 if pnode.group != snode.group:
10751 self.LogWarning("The primary and secondary nodes are in two"
10752 " different node groups; the disk parameters"
10753 " from the first disk's node group will be"
10756 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10758 if self.op.disk_template in constants.DTS_INT_MIRROR:
10759 nodes.append(snode)
10760 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10761 if compat.any(map(has_es, nodes)):
10762 raise errors.OpPrereqError("Disk template %s not supported with"
10763 " exclusive storage" % self.op.disk_template,
10764 errors.ECODE_STATE)
10766 nodenames = [pnode.name] + self.secondaries
10768 if not self.adopt_disks:
10769 if self.op.disk_template == constants.DT_RBD:
10770 # _CheckRADOSFreeSpace() is just a placeholder.
10771 # Any function that checks prerequisites can be placed here.
10772 # Check if there is enough space on the RADOS cluster.
10773 _CheckRADOSFreeSpace()
10774 elif self.op.disk_template == constants.DT_EXT:
10775 # FIXME: Function that checks prereqs if needed
10778 # Check lv size requirements, if not adopting
10779 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10780 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10782 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10783 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10784 disk[constants.IDISK_ADOPT])
10785 for disk in self.disks])
10786 if len(all_lvs) != len(self.disks):
10787 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10788 errors.ECODE_INVAL)
10789 for lv_name in all_lvs:
10791 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10792 # to ReserveLV uses the same syntax
10793 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10794 except errors.ReservationError:
10795 raise errors.OpPrereqError("LV named %s used by another instance" %
10796 lv_name, errors.ECODE_NOTUNIQUE)
10798 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10799 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10801 node_lvs = self.rpc.call_lv_list([pnode.name],
10802 vg_names.payload.keys())[pnode.name]
10803 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10804 node_lvs = node_lvs.payload
10806 delta = all_lvs.difference(node_lvs.keys())
10808 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10809 utils.CommaJoin(delta),
10810 errors.ECODE_INVAL)
10811 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10813 raise errors.OpPrereqError("Online logical volumes found, cannot"
10814 " adopt: %s" % utils.CommaJoin(online_lvs),
10815 errors.ECODE_STATE)
10816 # update the size of disk based on what is found
10817 for dsk in self.disks:
10818 dsk[constants.IDISK_SIZE] = \
10819 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10820 dsk[constants.IDISK_ADOPT])][0]))
10822 elif self.op.disk_template == constants.DT_BLOCK:
10823 # Normalize and de-duplicate device paths
10824 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10825 for disk in self.disks])
10826 if len(all_disks) != len(self.disks):
10827 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10828 errors.ECODE_INVAL)
10829 baddisks = [d for d in all_disks
10830 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10832 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10833 " cannot be adopted" %
10834 (utils.CommaJoin(baddisks),
10835 constants.ADOPTABLE_BLOCKDEV_ROOT),
10836 errors.ECODE_INVAL)
10838 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10839 list(all_disks))[pnode.name]
10840 node_disks.Raise("Cannot get block device information from node %s" %
10842 node_disks = node_disks.payload
10843 delta = all_disks.difference(node_disks.keys())
10845 raise errors.OpPrereqError("Missing block device(s): %s" %
10846 utils.CommaJoin(delta),
10847 errors.ECODE_INVAL)
10848 for dsk in self.disks:
10849 dsk[constants.IDISK_SIZE] = \
10850 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10852 # Verify instance specs
10853 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10855 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10856 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10857 constants.ISPEC_DISK_COUNT: len(self.disks),
10858 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10859 for disk in self.disks],
10860 constants.ISPEC_NIC_COUNT: len(self.nics),
10861 constants.ISPEC_SPINDLE_USE: spindle_use,
10864 group_info = self.cfg.GetNodeGroup(pnode.group)
10865 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10866 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10867 if not self.op.ignore_ipolicy and res:
10868 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10869 (pnode.group, group_info.name, utils.CommaJoin(res)))
10870 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10872 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10874 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10875 # check OS parameters (remotely)
10876 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10878 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10880 #TODO: _CheckExtParams (remotely)
10881 # Check parameters for extstorage
10883 # memory check on primary node
10884 #TODO(dynmem): use MINMEM for checking
10886 _CheckNodeFreeMemory(self, self.pnode.name,
10887 "creating instance %s" % self.op.instance_name,
10888 self.be_full[constants.BE_MAXMEM],
10889 self.op.hypervisor)
10891 self.dry_run_result = list(nodenames)
10893 def Exec(self, feedback_fn):
10894 """Create and add the instance to the cluster.
10897 instance = self.op.instance_name
10898 pnode_name = self.pnode.name
10900 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10901 self.owned_locks(locking.LEVEL_NODE)), \
10902 "Node locks differ from node resource locks"
10903 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10905 ht_kind = self.op.hypervisor
10906 if ht_kind in constants.HTS_REQ_PORT:
10907 network_port = self.cfg.AllocatePort()
10909 network_port = None
10911 # This is ugly but we got a chicken-egg problem here
10912 # We can only take the group disk parameters, as the instance
10913 # has no disks yet (we are generating them right here).
10914 node = self.cfg.GetNodeInfo(pnode_name)
10915 nodegroup = self.cfg.GetNodeGroup(node.group)
10916 disks = _GenerateDiskTemplate(self,
10917 self.op.disk_template,
10918 instance, pnode_name,
10921 self.instance_file_storage_dir,
10922 self.op.file_driver,
10925 self.cfg.GetGroupDiskParams(nodegroup))
10927 iobj = objects.Instance(name=instance, os=self.op.os_type,
10928 primary_node=pnode_name,
10929 nics=self.nics, disks=disks,
10930 disk_template=self.op.disk_template,
10931 admin_state=constants.ADMINST_DOWN,
10932 network_port=network_port,
10933 beparams=self.op.beparams,
10934 hvparams=self.op.hvparams,
10935 hypervisor=self.op.hypervisor,
10936 osparams=self.op.osparams,
10940 for tag in self.op.tags:
10943 if self.adopt_disks:
10944 if self.op.disk_template == constants.DT_PLAIN:
10945 # rename LVs to the newly-generated names; we need to construct
10946 # 'fake' LV disks with the old data, plus the new unique_id
10947 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10949 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10950 rename_to.append(t_dsk.logical_id)
10951 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10952 self.cfg.SetDiskID(t_dsk, pnode_name)
10953 result = self.rpc.call_blockdev_rename(pnode_name,
10954 zip(tmp_disks, rename_to))
10955 result.Raise("Failed to rename adoped LVs")
10957 feedback_fn("* creating instance disks...")
10959 _CreateDisks(self, iobj)
10960 except errors.OpExecError:
10961 self.LogWarning("Device creation failed, reverting...")
10963 _RemoveDisks(self, iobj)
10965 self.cfg.ReleaseDRBDMinors(instance)
10968 feedback_fn("adding instance %s to cluster config" % instance)
10970 self.cfg.AddInstance(iobj, self.proc.GetECId())
10972 # Declare that we don't want to remove the instance lock anymore, as we've
10973 # added the instance to the config
10974 del self.remove_locks[locking.LEVEL_INSTANCE]
10976 if self.op.mode == constants.INSTANCE_IMPORT:
10977 # Release unused nodes
10978 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10980 # Release all nodes
10981 _ReleaseLocks(self, locking.LEVEL_NODE)
10984 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10985 feedback_fn("* wiping instance disks...")
10987 _WipeDisks(self, iobj)
10988 except errors.OpExecError, err:
10989 logging.exception("Wiping disks failed")
10990 self.LogWarning("Wiping instance disks failed (%s)", err)
10994 # Something is already wrong with the disks, don't do anything else
10996 elif self.op.wait_for_sync:
10997 disk_abort = not _WaitForSync(self, iobj)
10998 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10999 # make sure the disks are not degraded (still sync-ing is ok)
11000 feedback_fn("* checking mirrors status")
11001 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11006 _RemoveDisks(self, iobj)
11007 self.cfg.RemoveInstance(iobj.name)
11008 # Make sure the instance lock gets removed
11009 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11010 raise errors.OpExecError("There are some degraded disks for"
11013 # Release all node resource locks
11014 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11016 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11017 # we need to set the disks ID to the primary node, since the
11018 # preceding code might or might have not done it, depending on
11019 # disk template and other options
11020 for disk in iobj.disks:
11021 self.cfg.SetDiskID(disk, pnode_name)
11022 if self.op.mode == constants.INSTANCE_CREATE:
11023 if not self.op.no_install:
11024 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11025 not self.op.wait_for_sync)
11027 feedback_fn("* pausing disk sync to install instance OS")
11028 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11031 for idx, success in enumerate(result.payload):
11033 logging.warn("pause-sync of instance %s for disk %d failed",
11036 feedback_fn("* running the instance OS create scripts...")
11037 # FIXME: pass debug option from opcode to backend
11039 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11040 self.op.debug_level)
11042 feedback_fn("* resuming disk sync")
11043 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11046 for idx, success in enumerate(result.payload):
11048 logging.warn("resume-sync of instance %s for disk %d failed",
11051 os_add_result.Raise("Could not add os for instance %s"
11052 " on node %s" % (instance, pnode_name))
11055 if self.op.mode == constants.INSTANCE_IMPORT:
11056 feedback_fn("* running the instance OS import scripts...")
11060 for idx, image in enumerate(self.src_images):
11064 # FIXME: pass debug option from opcode to backend
11065 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11066 constants.IEIO_FILE, (image, ),
11067 constants.IEIO_SCRIPT,
11068 (iobj.disks[idx], idx),
11070 transfers.append(dt)
11073 masterd.instance.TransferInstanceData(self, feedback_fn,
11074 self.op.src_node, pnode_name,
11075 self.pnode.secondary_ip,
11077 if not compat.all(import_result):
11078 self.LogWarning("Some disks for instance %s on node %s were not"
11079 " imported successfully" % (instance, pnode_name))
11081 rename_from = self._old_instance_name
11083 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11084 feedback_fn("* preparing remote import...")
11085 # The source cluster will stop the instance before attempting to make
11086 # a connection. In some cases stopping an instance can take a long
11087 # time, hence the shutdown timeout is added to the connection
11089 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11090 self.op.source_shutdown_timeout)
11091 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11093 assert iobj.primary_node == self.pnode.name
11095 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11096 self.source_x509_ca,
11097 self._cds, timeouts)
11098 if not compat.all(disk_results):
11099 # TODO: Should the instance still be started, even if some disks
11100 # failed to import (valid for local imports, too)?
11101 self.LogWarning("Some disks for instance %s on node %s were not"
11102 " imported successfully" % (instance, pnode_name))
11104 rename_from = self.source_instance_name
11107 # also checked in the prereq part
11108 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11111 # Run rename script on newly imported instance
11112 assert iobj.name == instance
11113 feedback_fn("Running rename script for %s" % instance)
11114 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11116 self.op.debug_level)
11117 if result.fail_msg:
11118 self.LogWarning("Failed to run rename script for %s on node"
11119 " %s: %s" % (instance, pnode_name, result.fail_msg))
11121 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11124 iobj.admin_state = constants.ADMINST_UP
11125 self.cfg.Update(iobj, feedback_fn)
11126 logging.info("Starting instance %s on node %s", instance, pnode_name)
11127 feedback_fn("* starting instance...")
11128 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11130 result.Raise("Could not start instance")
11132 return list(iobj.all_nodes)
11135 class LUInstanceMultiAlloc(NoHooksLU):
11136 """Allocates multiple instances at the same time.
11141 def CheckArguments(self):
11142 """Check arguments.
11146 for inst in self.op.instances:
11147 if inst.iallocator is not None:
11148 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11149 " instance objects", errors.ECODE_INVAL)
11150 nodes.append(bool(inst.pnode))
11151 if inst.disk_template in constants.DTS_INT_MIRROR:
11152 nodes.append(bool(inst.snode))
11154 has_nodes = compat.any(nodes)
11155 if compat.all(nodes) ^ has_nodes:
11156 raise errors.OpPrereqError("There are instance objects providing"
11157 " pnode/snode while others do not",
11158 errors.ECODE_INVAL)
11160 if self.op.iallocator is None:
11161 default_iallocator = self.cfg.GetDefaultIAllocator()
11162 if default_iallocator and has_nodes:
11163 self.op.iallocator = default_iallocator
11165 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11166 " given and no cluster-wide default"
11167 " iallocator found; please specify either"
11168 " an iallocator or nodes on the instances"
11169 " or set a cluster-wide default iallocator",
11170 errors.ECODE_INVAL)
11172 _CheckOpportunisticLocking(self.op)
11174 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11176 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11177 utils.CommaJoin(dups), errors.ECODE_INVAL)
11179 def ExpandNames(self):
11180 """Calculate the locks.
11183 self.share_locks = _ShareAll()
11184 self.needed_locks = {
11185 # iallocator will select nodes and even if no iallocator is used,
11186 # collisions with LUInstanceCreate should be avoided
11187 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11190 if self.op.iallocator:
11191 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11192 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11194 if self.op.opportunistic_locking:
11195 self.opportunistic_locks[locking.LEVEL_NODE] = True
11196 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11199 for inst in self.op.instances:
11200 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11201 nodeslist.append(inst.pnode)
11202 if inst.snode is not None:
11203 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11204 nodeslist.append(inst.snode)
11206 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11207 # Lock resources of instance's primary and secondary nodes (copy to
11208 # prevent accidential modification)
11209 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11211 def CheckPrereq(self):
11212 """Check prerequisite.
11215 cluster = self.cfg.GetClusterInfo()
11216 default_vg = self.cfg.GetVGName()
11217 ec_id = self.proc.GetECId()
11219 if self.op.opportunistic_locking:
11220 # Only consider nodes for which a lock is held
11221 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11223 node_whitelist = None
11225 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11226 _ComputeNics(op, cluster, None,
11228 _ComputeFullBeParams(op, cluster),
11230 for op in self.op.instances]
11232 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11233 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11235 ial.Run(self.op.iallocator)
11237 if not ial.success:
11238 raise errors.OpPrereqError("Can't compute nodes using"
11239 " iallocator '%s': %s" %
11240 (self.op.iallocator, ial.info),
11241 errors.ECODE_NORES)
11243 self.ia_result = ial.result
11245 if self.op.dry_run:
11246 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11247 constants.JOB_IDS_KEY: [],
11250 def _ConstructPartialResult(self):
11251 """Contructs the partial result.
11254 (allocatable, failed) = self.ia_result
11256 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11257 map(compat.fst, allocatable),
11258 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11261 def Exec(self, feedback_fn):
11262 """Executes the opcode.
11265 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11266 (allocatable, failed) = self.ia_result
11269 for (name, nodes) in allocatable:
11270 op = op2inst.pop(name)
11273 (op.pnode, op.snode) = nodes
11275 (op.pnode,) = nodes
11279 missing = set(op2inst.keys()) - set(failed)
11280 assert not missing, \
11281 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11283 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11286 def _CheckRADOSFreeSpace():
11287 """Compute disk size requirements inside the RADOS cluster.
11290 # For the RADOS cluster we assume there is always enough space.
11294 class LUInstanceConsole(NoHooksLU):
11295 """Connect to an instance's console.
11297 This is somewhat special in that it returns the command line that
11298 you need to run on the master node in order to connect to the
11304 def ExpandNames(self):
11305 self.share_locks = _ShareAll()
11306 self._ExpandAndLockInstance()
11308 def CheckPrereq(self):
11309 """Check prerequisites.
11311 This checks that the instance is in the cluster.
11314 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11315 assert self.instance is not None, \
11316 "Cannot retrieve locked instance %s" % self.op.instance_name
11317 _CheckNodeOnline(self, self.instance.primary_node)
11319 def Exec(self, feedback_fn):
11320 """Connect to the console of an instance
11323 instance = self.instance
11324 node = instance.primary_node
11326 node_insts = self.rpc.call_instance_list([node],
11327 [instance.hypervisor])[node]
11328 node_insts.Raise("Can't get node information from %s" % node)
11330 if instance.name not in node_insts.payload:
11331 if instance.admin_state == constants.ADMINST_UP:
11332 state = constants.INSTST_ERRORDOWN
11333 elif instance.admin_state == constants.ADMINST_DOWN:
11334 state = constants.INSTST_ADMINDOWN
11336 state = constants.INSTST_ADMINOFFLINE
11337 raise errors.OpExecError("Instance %s is not running (state %s)" %
11338 (instance.name, state))
11340 logging.debug("Connecting to console of %s on %s", instance.name, node)
11342 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11345 def _GetInstanceConsole(cluster, instance):
11346 """Returns console information for an instance.
11348 @type cluster: L{objects.Cluster}
11349 @type instance: L{objects.Instance}
11353 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11354 # beparams and hvparams are passed separately, to avoid editing the
11355 # instance and then saving the defaults in the instance itself.
11356 hvparams = cluster.FillHV(instance)
11357 beparams = cluster.FillBE(instance)
11358 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11360 assert console.instance == instance.name
11361 assert console.Validate()
11363 return console.ToDict()
11366 class LUInstanceReplaceDisks(LogicalUnit):
11367 """Replace the disks of an instance.
11370 HPATH = "mirrors-replace"
11371 HTYPE = constants.HTYPE_INSTANCE
11374 def CheckArguments(self):
11375 """Check arguments.
11378 remote_node = self.op.remote_node
11379 ialloc = self.op.iallocator
11380 if self.op.mode == constants.REPLACE_DISK_CHG:
11381 if remote_node is None and ialloc is None:
11382 raise errors.OpPrereqError("When changing the secondary either an"
11383 " iallocator script must be used or the"
11384 " new node given", errors.ECODE_INVAL)
11386 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11388 elif remote_node is not None or ialloc is not None:
11389 # Not replacing the secondary
11390 raise errors.OpPrereqError("The iallocator and new node options can"
11391 " only be used when changing the"
11392 " secondary node", errors.ECODE_INVAL)
11394 def ExpandNames(self):
11395 self._ExpandAndLockInstance()
11397 assert locking.LEVEL_NODE not in self.needed_locks
11398 assert locking.LEVEL_NODE_RES not in self.needed_locks
11399 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11401 assert self.op.iallocator is None or self.op.remote_node is None, \
11402 "Conflicting options"
11404 if self.op.remote_node is not None:
11405 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11407 # Warning: do not remove the locking of the new secondary here
11408 # unless DRBD8.AddChildren is changed to work in parallel;
11409 # currently it doesn't since parallel invocations of
11410 # FindUnusedMinor will conflict
11411 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11412 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11414 self.needed_locks[locking.LEVEL_NODE] = []
11415 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11417 if self.op.iallocator is not None:
11418 # iallocator will select a new node in the same group
11419 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11420 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11422 self.needed_locks[locking.LEVEL_NODE_RES] = []
11424 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11425 self.op.iallocator, self.op.remote_node,
11426 self.op.disks, self.op.early_release,
11427 self.op.ignore_ipolicy)
11429 self.tasklets = [self.replacer]
11431 def DeclareLocks(self, level):
11432 if level == locking.LEVEL_NODEGROUP:
11433 assert self.op.remote_node is None
11434 assert self.op.iallocator is not None
11435 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11437 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11438 # Lock all groups used by instance optimistically; this requires going
11439 # via the node before it's locked, requiring verification later on
11440 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11441 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11443 elif level == locking.LEVEL_NODE:
11444 if self.op.iallocator is not None:
11445 assert self.op.remote_node is None
11446 assert not self.needed_locks[locking.LEVEL_NODE]
11447 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11449 # Lock member nodes of all locked groups
11450 self.needed_locks[locking.LEVEL_NODE] = \
11452 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11453 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11455 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11457 self._LockInstancesNodes()
11459 elif level == locking.LEVEL_NODE_RES:
11461 self.needed_locks[locking.LEVEL_NODE_RES] = \
11462 self.needed_locks[locking.LEVEL_NODE]
11464 def BuildHooksEnv(self):
11465 """Build hooks env.
11467 This runs on the master, the primary and all the secondaries.
11470 instance = self.replacer.instance
11472 "MODE": self.op.mode,
11473 "NEW_SECONDARY": self.op.remote_node,
11474 "OLD_SECONDARY": instance.secondary_nodes[0],
11476 env.update(_BuildInstanceHookEnvByObject(self, instance))
11479 def BuildHooksNodes(self):
11480 """Build hooks nodes.
11483 instance = self.replacer.instance
11485 self.cfg.GetMasterNode(),
11486 instance.primary_node,
11488 if self.op.remote_node is not None:
11489 nl.append(self.op.remote_node)
11492 def CheckPrereq(self):
11493 """Check prerequisites.
11496 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11497 self.op.iallocator is None)
11499 # Verify if node group locks are still correct
11500 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11502 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11504 return LogicalUnit.CheckPrereq(self)
11507 class TLReplaceDisks(Tasklet):
11508 """Replaces disks for an instance.
11510 Note: Locking is not within the scope of this class.
11513 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11514 disks, early_release, ignore_ipolicy):
11515 """Initializes this class.
11518 Tasklet.__init__(self, lu)
11521 self.instance_name = instance_name
11523 self.iallocator_name = iallocator_name
11524 self.remote_node = remote_node
11526 self.early_release = early_release
11527 self.ignore_ipolicy = ignore_ipolicy
11530 self.instance = None
11531 self.new_node = None
11532 self.target_node = None
11533 self.other_node = None
11534 self.remote_node_info = None
11535 self.node_secondary_ip = None
11538 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11539 """Compute a new secondary node using an IAllocator.
11542 req = iallocator.IAReqRelocate(name=instance_name,
11543 relocate_from=list(relocate_from))
11544 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11546 ial.Run(iallocator_name)
11548 if not ial.success:
11549 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11550 " %s" % (iallocator_name, ial.info),
11551 errors.ECODE_NORES)
11553 remote_node_name = ial.result[0]
11555 lu.LogInfo("Selected new secondary for instance '%s': %s",
11556 instance_name, remote_node_name)
11558 return remote_node_name
11560 def _FindFaultyDisks(self, node_name):
11561 """Wrapper for L{_FindFaultyInstanceDisks}.
11564 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11567 def _CheckDisksActivated(self, instance):
11568 """Checks if the instance disks are activated.
11570 @param instance: The instance to check disks
11571 @return: True if they are activated, False otherwise
11574 nodes = instance.all_nodes
11576 for idx, dev in enumerate(instance.disks):
11578 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11579 self.cfg.SetDiskID(dev, node)
11581 result = _BlockdevFind(self, node, dev, instance)
11585 elif result.fail_msg or not result.payload:
11590 def CheckPrereq(self):
11591 """Check prerequisites.
11593 This checks that the instance is in the cluster.
11596 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11597 assert instance is not None, \
11598 "Cannot retrieve locked instance %s" % self.instance_name
11600 if instance.disk_template != constants.DT_DRBD8:
11601 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11602 " instances", errors.ECODE_INVAL)
11604 if len(instance.secondary_nodes) != 1:
11605 raise errors.OpPrereqError("The instance has a strange layout,"
11606 " expected one secondary but found %d" %
11607 len(instance.secondary_nodes),
11608 errors.ECODE_FAULT)
11610 instance = self.instance
11611 secondary_node = instance.secondary_nodes[0]
11613 if self.iallocator_name is None:
11614 remote_node = self.remote_node
11616 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11617 instance.name, instance.secondary_nodes)
11619 if remote_node is None:
11620 self.remote_node_info = None
11622 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11623 "Remote node '%s' is not locked" % remote_node
11625 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11626 assert self.remote_node_info is not None, \
11627 "Cannot retrieve locked node %s" % remote_node
11629 if remote_node == self.instance.primary_node:
11630 raise errors.OpPrereqError("The specified node is the primary node of"
11631 " the instance", errors.ECODE_INVAL)
11633 if remote_node == secondary_node:
11634 raise errors.OpPrereqError("The specified node is already the"
11635 " secondary node of the instance",
11636 errors.ECODE_INVAL)
11638 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11639 constants.REPLACE_DISK_CHG):
11640 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11641 errors.ECODE_INVAL)
11643 if self.mode == constants.REPLACE_DISK_AUTO:
11644 if not self._CheckDisksActivated(instance):
11645 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11646 " first" % self.instance_name,
11647 errors.ECODE_STATE)
11648 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11649 faulty_secondary = self._FindFaultyDisks(secondary_node)
11651 if faulty_primary and faulty_secondary:
11652 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11653 " one node and can not be repaired"
11654 " automatically" % self.instance_name,
11655 errors.ECODE_STATE)
11658 self.disks = faulty_primary
11659 self.target_node = instance.primary_node
11660 self.other_node = secondary_node
11661 check_nodes = [self.target_node, self.other_node]
11662 elif faulty_secondary:
11663 self.disks = faulty_secondary
11664 self.target_node = secondary_node
11665 self.other_node = instance.primary_node
11666 check_nodes = [self.target_node, self.other_node]
11672 # Non-automatic modes
11673 if self.mode == constants.REPLACE_DISK_PRI:
11674 self.target_node = instance.primary_node
11675 self.other_node = secondary_node
11676 check_nodes = [self.target_node, self.other_node]
11678 elif self.mode == constants.REPLACE_DISK_SEC:
11679 self.target_node = secondary_node
11680 self.other_node = instance.primary_node
11681 check_nodes = [self.target_node, self.other_node]
11683 elif self.mode == constants.REPLACE_DISK_CHG:
11684 self.new_node = remote_node
11685 self.other_node = instance.primary_node
11686 self.target_node = secondary_node
11687 check_nodes = [self.new_node, self.other_node]
11689 _CheckNodeNotDrained(self.lu, remote_node)
11690 _CheckNodeVmCapable(self.lu, remote_node)
11692 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11693 assert old_node_info is not None
11694 if old_node_info.offline and not self.early_release:
11695 # doesn't make sense to delay the release
11696 self.early_release = True
11697 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11698 " early-release mode", secondary_node)
11701 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11704 # If not specified all disks should be replaced
11706 self.disks = range(len(self.instance.disks))
11708 # TODO: This is ugly, but right now we can't distinguish between internal
11709 # submitted opcode and external one. We should fix that.
11710 if self.remote_node_info:
11711 # We change the node, lets verify it still meets instance policy
11712 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11713 cluster = self.cfg.GetClusterInfo()
11714 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11716 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11717 ignore=self.ignore_ipolicy)
11719 for node in check_nodes:
11720 _CheckNodeOnline(self.lu, node)
11722 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11725 if node_name is not None)
11727 # Release unneeded node and node resource locks
11728 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11729 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11730 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11732 # Release any owned node group
11733 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11735 # Check whether disks are valid
11736 for disk_idx in self.disks:
11737 instance.FindDisk(disk_idx)
11739 # Get secondary node IP addresses
11740 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11741 in self.cfg.GetMultiNodeInfo(touched_nodes))
11743 def Exec(self, feedback_fn):
11744 """Execute disk replacement.
11746 This dispatches the disk replacement to the appropriate handler.
11750 # Verify owned locks before starting operation
11751 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11752 assert set(owned_nodes) == set(self.node_secondary_ip), \
11753 ("Incorrect node locks, owning %s, expected %s" %
11754 (owned_nodes, self.node_secondary_ip.keys()))
11755 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11756 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11757 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11759 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11760 assert list(owned_instances) == [self.instance_name], \
11761 "Instance '%s' not locked" % self.instance_name
11763 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11764 "Should not own any node group lock at this point"
11767 feedback_fn("No disks need replacement for instance '%s'" %
11768 self.instance.name)
11771 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11772 (utils.CommaJoin(self.disks), self.instance.name))
11773 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11774 feedback_fn("Current seconary node: %s" %
11775 utils.CommaJoin(self.instance.secondary_nodes))
11777 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11779 # Activate the instance disks if we're replacing them on a down instance
11781 _StartInstanceDisks(self.lu, self.instance, True)
11784 # Should we replace the secondary node?
11785 if self.new_node is not None:
11786 fn = self._ExecDrbd8Secondary
11788 fn = self._ExecDrbd8DiskOnly
11790 result = fn(feedback_fn)
11792 # Deactivate the instance disks if we're replacing them on a
11795 _SafeShutdownInstanceDisks(self.lu, self.instance)
11797 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11800 # Verify owned locks
11801 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11802 nodes = frozenset(self.node_secondary_ip)
11803 assert ((self.early_release and not owned_nodes) or
11804 (not self.early_release and not (set(owned_nodes) - nodes))), \
11805 ("Not owning the correct locks, early_release=%s, owned=%r,"
11806 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11810 def _CheckVolumeGroup(self, nodes):
11811 self.lu.LogInfo("Checking volume groups")
11813 vgname = self.cfg.GetVGName()
11815 # Make sure volume group exists on all involved nodes
11816 results = self.rpc.call_vg_list(nodes)
11818 raise errors.OpExecError("Can't list volume groups on the nodes")
11821 res = results[node]
11822 res.Raise("Error checking node %s" % node)
11823 if vgname not in res.payload:
11824 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11827 def _CheckDisksExistence(self, nodes):
11828 # Check disk existence
11829 for idx, dev in enumerate(self.instance.disks):
11830 if idx not in self.disks:
11834 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11835 self.cfg.SetDiskID(dev, node)
11837 result = _BlockdevFind(self, node, dev, self.instance)
11839 msg = result.fail_msg
11840 if msg or not result.payload:
11842 msg = "disk not found"
11843 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11846 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11847 for idx, dev in enumerate(self.instance.disks):
11848 if idx not in self.disks:
11851 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11854 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11855 on_primary, ldisk=ldisk):
11856 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11857 " replace disks for instance %s" %
11858 (node_name, self.instance.name))
11860 def _CreateNewStorage(self, node_name):
11861 """Create new storage on the primary or secondary node.
11863 This is only used for same-node replaces, not for changing the
11864 secondary node, hence we don't want to modify the existing disk.
11869 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11870 for idx, dev in enumerate(disks):
11871 if idx not in self.disks:
11874 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11876 self.cfg.SetDiskID(dev, node_name)
11878 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11879 names = _GenerateUniqueNames(self.lu, lv_names)
11881 (data_disk, meta_disk) = dev.children
11882 vg_data = data_disk.logical_id[0]
11883 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11884 logical_id=(vg_data, names[0]),
11885 params=data_disk.params)
11886 vg_meta = meta_disk.logical_id[0]
11887 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11888 size=constants.DRBD_META_SIZE,
11889 logical_id=(vg_meta, names[1]),
11890 params=meta_disk.params)
11892 new_lvs = [lv_data, lv_meta]
11893 old_lvs = [child.Copy() for child in dev.children]
11894 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11895 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11897 # we pass force_create=True to force the LVM creation
11898 for new_lv in new_lvs:
11899 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11900 _GetInstanceInfoText(self.instance), False,
11905 def _CheckDevices(self, node_name, iv_names):
11906 for name, (dev, _, _) in iv_names.iteritems():
11907 self.cfg.SetDiskID(dev, node_name)
11909 result = _BlockdevFind(self, node_name, dev, self.instance)
11911 msg = result.fail_msg
11912 if msg or not result.payload:
11914 msg = "disk not found"
11915 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11918 if result.payload.is_degraded:
11919 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11921 def _RemoveOldStorage(self, node_name, iv_names):
11922 for name, (_, old_lvs, _) in iv_names.iteritems():
11923 self.lu.LogInfo("Remove logical volumes for %s", name)
11926 self.cfg.SetDiskID(lv, node_name)
11928 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11930 self.lu.LogWarning("Can't remove old LV: %s", msg,
11931 hint="remove unused LVs manually")
11933 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11934 """Replace a disk on the primary or secondary for DRBD 8.
11936 The algorithm for replace is quite complicated:
11938 1. for each disk to be replaced:
11940 1. create new LVs on the target node with unique names
11941 1. detach old LVs from the drbd device
11942 1. rename old LVs to name_replaced.<time_t>
11943 1. rename new LVs to old LVs
11944 1. attach the new LVs (with the old names now) to the drbd device
11946 1. wait for sync across all devices
11948 1. for each modified disk:
11950 1. remove old LVs (which have the name name_replaces.<time_t>)
11952 Failures are not very well handled.
11957 # Step: check device activation
11958 self.lu.LogStep(1, steps_total, "Check device existence")
11959 self._CheckDisksExistence([self.other_node, self.target_node])
11960 self._CheckVolumeGroup([self.target_node, self.other_node])
11962 # Step: check other node consistency
11963 self.lu.LogStep(2, steps_total, "Check peer consistency")
11964 self._CheckDisksConsistency(self.other_node,
11965 self.other_node == self.instance.primary_node,
11968 # Step: create new storage
11969 self.lu.LogStep(3, steps_total, "Allocate new storage")
11970 iv_names = self._CreateNewStorage(self.target_node)
11972 # Step: for each lv, detach+rename*2+attach
11973 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11974 for dev, old_lvs, new_lvs in iv_names.itervalues():
11975 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11977 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11979 result.Raise("Can't detach drbd from local storage on node"
11980 " %s for device %s" % (self.target_node, dev.iv_name))
11982 #cfg.Update(instance)
11984 # ok, we created the new LVs, so now we know we have the needed
11985 # storage; as such, we proceed on the target node to rename
11986 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11987 # using the assumption that logical_id == physical_id (which in
11988 # turn is the unique_id on that node)
11990 # FIXME(iustin): use a better name for the replaced LVs
11991 temp_suffix = int(time.time())
11992 ren_fn = lambda d, suff: (d.physical_id[0],
11993 d.physical_id[1] + "_replaced-%s" % suff)
11995 # Build the rename list based on what LVs exist on the node
11996 rename_old_to_new = []
11997 for to_ren in old_lvs:
11998 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11999 if not result.fail_msg and result.payload:
12001 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12003 self.lu.LogInfo("Renaming the old LVs on the target node")
12004 result = self.rpc.call_blockdev_rename(self.target_node,
12006 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12008 # Now we rename the new LVs to the old LVs
12009 self.lu.LogInfo("Renaming the new LVs on the target node")
12010 rename_new_to_old = [(new, old.physical_id)
12011 for old, new in zip(old_lvs, new_lvs)]
12012 result = self.rpc.call_blockdev_rename(self.target_node,
12014 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12016 # Intermediate steps of in memory modifications
12017 for old, new in zip(old_lvs, new_lvs):
12018 new.logical_id = old.logical_id
12019 self.cfg.SetDiskID(new, self.target_node)
12021 # We need to modify old_lvs so that removal later removes the
12022 # right LVs, not the newly added ones; note that old_lvs is a
12024 for disk in old_lvs:
12025 disk.logical_id = ren_fn(disk, temp_suffix)
12026 self.cfg.SetDiskID(disk, self.target_node)
12028 # Now that the new lvs have the old name, we can add them to the device
12029 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12030 result = self.rpc.call_blockdev_addchildren(self.target_node,
12031 (dev, self.instance), new_lvs)
12032 msg = result.fail_msg
12034 for new_lv in new_lvs:
12035 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12038 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12039 hint=("cleanup manually the unused logical"
12041 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12043 cstep = itertools.count(5)
12045 if self.early_release:
12046 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12047 self._RemoveOldStorage(self.target_node, iv_names)
12048 # TODO: Check if releasing locks early still makes sense
12049 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12051 # Release all resource locks except those used by the instance
12052 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12053 keep=self.node_secondary_ip.keys())
12055 # Release all node locks while waiting for sync
12056 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12058 # TODO: Can the instance lock be downgraded here? Take the optional disk
12059 # shutdown in the caller into consideration.
12062 # This can fail as the old devices are degraded and _WaitForSync
12063 # does a combined result over all disks, so we don't check its return value
12064 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12065 _WaitForSync(self.lu, self.instance)
12067 # Check all devices manually
12068 self._CheckDevices(self.instance.primary_node, iv_names)
12070 # Step: remove old storage
12071 if not self.early_release:
12072 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12073 self._RemoveOldStorage(self.target_node, iv_names)
12075 def _ExecDrbd8Secondary(self, feedback_fn):
12076 """Replace the secondary node for DRBD 8.
12078 The algorithm for replace is quite complicated:
12079 - for all disks of the instance:
12080 - create new LVs on the new node with same names
12081 - shutdown the drbd device on the old secondary
12082 - disconnect the drbd network on the primary
12083 - create the drbd device on the new secondary
12084 - network attach the drbd on the primary, using an artifice:
12085 the drbd code for Attach() will connect to the network if it
12086 finds a device which is connected to the good local disks but
12087 not network enabled
12088 - wait for sync across all devices
12089 - remove all disks from the old secondary
12091 Failures are not very well handled.
12096 pnode = self.instance.primary_node
12098 # Step: check device activation
12099 self.lu.LogStep(1, steps_total, "Check device existence")
12100 self._CheckDisksExistence([self.instance.primary_node])
12101 self._CheckVolumeGroup([self.instance.primary_node])
12103 # Step: check other node consistency
12104 self.lu.LogStep(2, steps_total, "Check peer consistency")
12105 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12107 # Step: create new storage
12108 self.lu.LogStep(3, steps_total, "Allocate new storage")
12109 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12110 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12111 for idx, dev in enumerate(disks):
12112 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12113 (self.new_node, idx))
12114 # we pass force_create=True to force LVM creation
12115 for new_lv in dev.children:
12116 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12117 True, _GetInstanceInfoText(self.instance), False,
12120 # Step 4: dbrd minors and drbd setups changes
12121 # after this, we must manually remove the drbd minors on both the
12122 # error and the success paths
12123 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12124 minors = self.cfg.AllocateDRBDMinor([self.new_node
12125 for dev in self.instance.disks],
12126 self.instance.name)
12127 logging.debug("Allocated minors %r", minors)
12130 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12131 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12132 (self.new_node, idx))
12133 # create new devices on new_node; note that we create two IDs:
12134 # one without port, so the drbd will be activated without
12135 # networking information on the new node at this stage, and one
12136 # with network, for the latter activation in step 4
12137 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12138 if self.instance.primary_node == o_node1:
12141 assert self.instance.primary_node == o_node2, "Three-node instance?"
12144 new_alone_id = (self.instance.primary_node, self.new_node, None,
12145 p_minor, new_minor, o_secret)
12146 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12147 p_minor, new_minor, o_secret)
12149 iv_names[idx] = (dev, dev.children, new_net_id)
12150 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12152 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12153 logical_id=new_alone_id,
12154 children=dev.children,
12157 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12160 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12162 _GetInstanceInfoText(self.instance), False,
12164 except errors.GenericError:
12165 self.cfg.ReleaseDRBDMinors(self.instance.name)
12168 # We have new devices, shutdown the drbd on the old secondary
12169 for idx, dev in enumerate(self.instance.disks):
12170 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12171 self.cfg.SetDiskID(dev, self.target_node)
12172 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12173 (dev, self.instance)).fail_msg
12175 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12176 "node: %s" % (idx, msg),
12177 hint=("Please cleanup this device manually as"
12178 " soon as possible"))
12180 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12181 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12182 self.instance.disks)[pnode]
12184 msg = result.fail_msg
12186 # detaches didn't succeed (unlikely)
12187 self.cfg.ReleaseDRBDMinors(self.instance.name)
12188 raise errors.OpExecError("Can't detach the disks from the network on"
12189 " old node: %s" % (msg,))
12191 # if we managed to detach at least one, we update all the disks of
12192 # the instance to point to the new secondary
12193 self.lu.LogInfo("Updating instance configuration")
12194 for dev, _, new_logical_id in iv_names.itervalues():
12195 dev.logical_id = new_logical_id
12196 self.cfg.SetDiskID(dev, self.instance.primary_node)
12198 self.cfg.Update(self.instance, feedback_fn)
12200 # Release all node locks (the configuration has been updated)
12201 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12203 # and now perform the drbd attach
12204 self.lu.LogInfo("Attaching primary drbds to new secondary"
12205 " (standalone => connected)")
12206 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12208 self.node_secondary_ip,
12209 (self.instance.disks, self.instance),
12210 self.instance.name,
12212 for to_node, to_result in result.items():
12213 msg = to_result.fail_msg
12215 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12217 hint=("please do a gnt-instance info to see the"
12218 " status of disks"))
12220 cstep = itertools.count(5)
12222 if self.early_release:
12223 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12224 self._RemoveOldStorage(self.target_node, iv_names)
12225 # TODO: Check if releasing locks early still makes sense
12226 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12228 # Release all resource locks except those used by the instance
12229 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12230 keep=self.node_secondary_ip.keys())
12232 # TODO: Can the instance lock be downgraded here? Take the optional disk
12233 # shutdown in the caller into consideration.
12236 # This can fail as the old devices are degraded and _WaitForSync
12237 # does a combined result over all disks, so we don't check its return value
12238 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12239 _WaitForSync(self.lu, self.instance)
12241 # Check all devices manually
12242 self._CheckDevices(self.instance.primary_node, iv_names)
12244 # Step: remove old storage
12245 if not self.early_release:
12246 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12247 self._RemoveOldStorage(self.target_node, iv_names)
12250 class LURepairNodeStorage(NoHooksLU):
12251 """Repairs the volume group on a node.
12256 def CheckArguments(self):
12257 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12259 storage_type = self.op.storage_type
12261 if (constants.SO_FIX_CONSISTENCY not in
12262 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12263 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12264 " repaired" % storage_type,
12265 errors.ECODE_INVAL)
12267 def ExpandNames(self):
12268 self.needed_locks = {
12269 locking.LEVEL_NODE: [self.op.node_name],
12272 def _CheckFaultyDisks(self, instance, node_name):
12273 """Ensure faulty disks abort the opcode or at least warn."""
12275 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12277 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12278 " node '%s'" % (instance.name, node_name),
12279 errors.ECODE_STATE)
12280 except errors.OpPrereqError, err:
12281 if self.op.ignore_consistency:
12282 self.LogWarning(str(err.args[0]))
12286 def CheckPrereq(self):
12287 """Check prerequisites.
12290 # Check whether any instance on this node has faulty disks
12291 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12292 if inst.admin_state != constants.ADMINST_UP:
12294 check_nodes = set(inst.all_nodes)
12295 check_nodes.discard(self.op.node_name)
12296 for inst_node_name in check_nodes:
12297 self._CheckFaultyDisks(inst, inst_node_name)
12299 def Exec(self, feedback_fn):
12300 feedback_fn("Repairing storage unit '%s' on %s ..." %
12301 (self.op.name, self.op.node_name))
12303 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12304 result = self.rpc.call_storage_execute(self.op.node_name,
12305 self.op.storage_type, st_args,
12307 constants.SO_FIX_CONSISTENCY)
12308 result.Raise("Failed to repair storage unit '%s' on %s" %
12309 (self.op.name, self.op.node_name))
12312 class LUNodeEvacuate(NoHooksLU):
12313 """Evacuates instances off a list of nodes.
12318 _MODE2IALLOCATOR = {
12319 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12320 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12321 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12323 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12324 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12325 constants.IALLOCATOR_NEVAC_MODES)
12327 def CheckArguments(self):
12328 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12330 def ExpandNames(self):
12331 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12333 if self.op.remote_node is not None:
12334 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12335 assert self.op.remote_node
12337 if self.op.remote_node == self.op.node_name:
12338 raise errors.OpPrereqError("Can not use evacuated node as a new"
12339 " secondary node", errors.ECODE_INVAL)
12341 if self.op.mode != constants.NODE_EVAC_SEC:
12342 raise errors.OpPrereqError("Without the use of an iallocator only"
12343 " secondary instances can be evacuated",
12344 errors.ECODE_INVAL)
12347 self.share_locks = _ShareAll()
12348 self.needed_locks = {
12349 locking.LEVEL_INSTANCE: [],
12350 locking.LEVEL_NODEGROUP: [],
12351 locking.LEVEL_NODE: [],
12354 # Determine nodes (via group) optimistically, needs verification once locks
12355 # have been acquired
12356 self.lock_nodes = self._DetermineNodes()
12358 def _DetermineNodes(self):
12359 """Gets the list of nodes to operate on.
12362 if self.op.remote_node is None:
12363 # Iallocator will choose any node(s) in the same group
12364 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12366 group_nodes = frozenset([self.op.remote_node])
12368 # Determine nodes to be locked
12369 return set([self.op.node_name]) | group_nodes
12371 def _DetermineInstances(self):
12372 """Builds list of instances to operate on.
12375 assert self.op.mode in constants.NODE_EVAC_MODES
12377 if self.op.mode == constants.NODE_EVAC_PRI:
12378 # Primary instances only
12379 inst_fn = _GetNodePrimaryInstances
12380 assert self.op.remote_node is None, \
12381 "Evacuating primary instances requires iallocator"
12382 elif self.op.mode == constants.NODE_EVAC_SEC:
12383 # Secondary instances only
12384 inst_fn = _GetNodeSecondaryInstances
12387 assert self.op.mode == constants.NODE_EVAC_ALL
12388 inst_fn = _GetNodeInstances
12389 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12391 raise errors.OpPrereqError("Due to an issue with the iallocator"
12392 " interface it is not possible to evacuate"
12393 " all instances at once; specify explicitly"
12394 " whether to evacuate primary or secondary"
12396 errors.ECODE_INVAL)
12398 return inst_fn(self.cfg, self.op.node_name)
12400 def DeclareLocks(self, level):
12401 if level == locking.LEVEL_INSTANCE:
12402 # Lock instances optimistically, needs verification once node and group
12403 # locks have been acquired
12404 self.needed_locks[locking.LEVEL_INSTANCE] = \
12405 set(i.name for i in self._DetermineInstances())
12407 elif level == locking.LEVEL_NODEGROUP:
12408 # Lock node groups for all potential target nodes optimistically, needs
12409 # verification once nodes have been acquired
12410 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12411 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12413 elif level == locking.LEVEL_NODE:
12414 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12416 def CheckPrereq(self):
12418 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12419 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12420 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12422 need_nodes = self._DetermineNodes()
12424 if not owned_nodes.issuperset(need_nodes):
12425 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12426 " locks were acquired, current nodes are"
12427 " are '%s', used to be '%s'; retry the"
12429 (self.op.node_name,
12430 utils.CommaJoin(need_nodes),
12431 utils.CommaJoin(owned_nodes)),
12432 errors.ECODE_STATE)
12434 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12435 if owned_groups != wanted_groups:
12436 raise errors.OpExecError("Node groups changed since locks were acquired,"
12437 " current groups are '%s', used to be '%s';"
12438 " retry the operation" %
12439 (utils.CommaJoin(wanted_groups),
12440 utils.CommaJoin(owned_groups)))
12442 # Determine affected instances
12443 self.instances = self._DetermineInstances()
12444 self.instance_names = [i.name for i in self.instances]
12446 if set(self.instance_names) != owned_instances:
12447 raise errors.OpExecError("Instances on node '%s' changed since locks"
12448 " were acquired, current instances are '%s',"
12449 " used to be '%s'; retry the operation" %
12450 (self.op.node_name,
12451 utils.CommaJoin(self.instance_names),
12452 utils.CommaJoin(owned_instances)))
12454 if self.instance_names:
12455 self.LogInfo("Evacuating instances from node '%s': %s",
12457 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12459 self.LogInfo("No instances to evacuate from node '%s'",
12462 if self.op.remote_node is not None:
12463 for i in self.instances:
12464 if i.primary_node == self.op.remote_node:
12465 raise errors.OpPrereqError("Node %s is the primary node of"
12466 " instance %s, cannot use it as"
12468 (self.op.remote_node, i.name),
12469 errors.ECODE_INVAL)
12471 def Exec(self, feedback_fn):
12472 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12474 if not self.instance_names:
12475 # No instances to evacuate
12478 elif self.op.iallocator is not None:
12479 # TODO: Implement relocation to other group
12480 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12481 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12482 instances=list(self.instance_names))
12483 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12485 ial.Run(self.op.iallocator)
12487 if not ial.success:
12488 raise errors.OpPrereqError("Can't compute node evacuation using"
12489 " iallocator '%s': %s" %
12490 (self.op.iallocator, ial.info),
12491 errors.ECODE_NORES)
12493 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12495 elif self.op.remote_node is not None:
12496 assert self.op.mode == constants.NODE_EVAC_SEC
12498 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12499 remote_node=self.op.remote_node,
12501 mode=constants.REPLACE_DISK_CHG,
12502 early_release=self.op.early_release)]
12503 for instance_name in self.instance_names]
12506 raise errors.ProgrammerError("No iallocator or remote node")
12508 return ResultWithJobs(jobs)
12511 def _SetOpEarlyRelease(early_release, op):
12512 """Sets C{early_release} flag on opcodes if available.
12516 op.early_release = early_release
12517 except AttributeError:
12518 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12523 def _NodeEvacDest(use_nodes, group, nodes):
12524 """Returns group or nodes depending on caller's choice.
12528 return utils.CommaJoin(nodes)
12533 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12534 """Unpacks the result of change-group and node-evacuate iallocator requests.
12536 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12537 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12539 @type lu: L{LogicalUnit}
12540 @param lu: Logical unit instance
12541 @type alloc_result: tuple/list
12542 @param alloc_result: Result from iallocator
12543 @type early_release: bool
12544 @param early_release: Whether to release locks early if possible
12545 @type use_nodes: bool
12546 @param use_nodes: Whether to display node names instead of groups
12549 (moved, failed, jobs) = alloc_result
12552 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12553 for (name, reason) in failed)
12554 lu.LogWarning("Unable to evacuate instances %s", failreason)
12555 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12558 lu.LogInfo("Instances to be moved: %s",
12559 utils.CommaJoin("%s (to %s)" %
12560 (name, _NodeEvacDest(use_nodes, group, nodes))
12561 for (name, group, nodes) in moved))
12563 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12564 map(opcodes.OpCode.LoadOpCode, ops))
12568 def _DiskSizeInBytesToMebibytes(lu, size):
12569 """Converts a disk size in bytes to mebibytes.
12571 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12574 (mib, remainder) = divmod(size, 1024 * 1024)
12577 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12578 " to not overwrite existing data (%s bytes will not be"
12579 " wiped)", (1024 * 1024) - remainder)
12585 class LUInstanceGrowDisk(LogicalUnit):
12586 """Grow a disk of an instance.
12589 HPATH = "disk-grow"
12590 HTYPE = constants.HTYPE_INSTANCE
12593 def ExpandNames(self):
12594 self._ExpandAndLockInstance()
12595 self.needed_locks[locking.LEVEL_NODE] = []
12596 self.needed_locks[locking.LEVEL_NODE_RES] = []
12597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12598 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12600 def DeclareLocks(self, level):
12601 if level == locking.LEVEL_NODE:
12602 self._LockInstancesNodes()
12603 elif level == locking.LEVEL_NODE_RES:
12605 self.needed_locks[locking.LEVEL_NODE_RES] = \
12606 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12608 def BuildHooksEnv(self):
12609 """Build hooks env.
12611 This runs on the master, the primary and all the secondaries.
12615 "DISK": self.op.disk,
12616 "AMOUNT": self.op.amount,
12617 "ABSOLUTE": self.op.absolute,
12619 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12622 def BuildHooksNodes(self):
12623 """Build hooks nodes.
12626 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12629 def CheckPrereq(self):
12630 """Check prerequisites.
12632 This checks that the instance is in the cluster.
12635 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12636 assert instance is not None, \
12637 "Cannot retrieve locked instance %s" % self.op.instance_name
12638 nodenames = list(instance.all_nodes)
12639 for node in nodenames:
12640 _CheckNodeOnline(self, node)
12642 self.instance = instance
12644 if instance.disk_template not in constants.DTS_GROWABLE:
12645 raise errors.OpPrereqError("Instance's disk layout does not support"
12646 " growing", errors.ECODE_INVAL)
12648 self.disk = instance.FindDisk(self.op.disk)
12650 if self.op.absolute:
12651 self.target = self.op.amount
12652 self.delta = self.target - self.disk.size
12654 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12655 "current disk size (%s)" %
12656 (utils.FormatUnit(self.target, "h"),
12657 utils.FormatUnit(self.disk.size, "h")),
12658 errors.ECODE_STATE)
12660 self.delta = self.op.amount
12661 self.target = self.disk.size + self.delta
12663 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12664 utils.FormatUnit(self.delta, "h"),
12665 errors.ECODE_INVAL)
12667 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12669 def _CheckDiskSpace(self, nodenames, req_vgspace):
12670 template = self.instance.disk_template
12671 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12672 # TODO: check the free disk space for file, when that feature will be
12674 nodes = map(self.cfg.GetNodeInfo, nodenames)
12675 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12678 # With exclusive storage we need to something smarter than just looking
12679 # at free space; for now, let's simply abort the operation.
12680 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12681 " is enabled", errors.ECODE_STATE)
12682 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12684 def Exec(self, feedback_fn):
12685 """Execute disk grow.
12688 instance = self.instance
12691 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12692 assert (self.owned_locks(locking.LEVEL_NODE) ==
12693 self.owned_locks(locking.LEVEL_NODE_RES))
12695 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12697 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12699 raise errors.OpExecError("Cannot activate block device to grow")
12701 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12702 (self.op.disk, instance.name,
12703 utils.FormatUnit(self.delta, "h"),
12704 utils.FormatUnit(self.target, "h")))
12706 # First run all grow ops in dry-run mode
12707 for node in instance.all_nodes:
12708 self.cfg.SetDiskID(disk, node)
12709 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12711 result.Raise("Dry-run grow request failed to node %s" % node)
12714 # Get disk size from primary node for wiping
12715 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12716 result.Raise("Failed to retrieve disk size from node '%s'" %
12717 instance.primary_node)
12719 (disk_size_in_bytes, ) = result.payload
12721 if disk_size_in_bytes is None:
12722 raise errors.OpExecError("Failed to retrieve disk size from primary"
12723 " node '%s'" % instance.primary_node)
12725 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12727 assert old_disk_size >= disk.size, \
12728 ("Retrieved disk size too small (got %s, should be at least %s)" %
12729 (old_disk_size, disk.size))
12731 old_disk_size = None
12733 # We know that (as far as we can test) operations across different
12734 # nodes will succeed, time to run it for real on the backing storage
12735 for node in instance.all_nodes:
12736 self.cfg.SetDiskID(disk, node)
12737 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12739 result.Raise("Grow request failed to node %s" % node)
12741 # And now execute it for logical storage, on the primary node
12742 node = instance.primary_node
12743 self.cfg.SetDiskID(disk, node)
12744 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12746 result.Raise("Grow request failed to node %s" % node)
12748 disk.RecordGrow(self.delta)
12749 self.cfg.Update(instance, feedback_fn)
12751 # Changes have been recorded, release node lock
12752 _ReleaseLocks(self, locking.LEVEL_NODE)
12754 # Downgrade lock while waiting for sync
12755 self.glm.downgrade(locking.LEVEL_INSTANCE)
12757 assert wipe_disks ^ (old_disk_size is None)
12760 assert instance.disks[self.op.disk] == disk
12762 # Wipe newly added disk space
12763 _WipeDisks(self, instance,
12764 disks=[(self.op.disk, disk, old_disk_size)])
12766 if self.op.wait_for_sync:
12767 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12769 self.LogWarning("Disk syncing has not returned a good status; check"
12771 if instance.admin_state != constants.ADMINST_UP:
12772 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12773 elif instance.admin_state != constants.ADMINST_UP:
12774 self.LogWarning("Not shutting down the disk even if the instance is"
12775 " not supposed to be running because no wait for"
12776 " sync mode was requested")
12778 assert self.owned_locks(locking.LEVEL_NODE_RES)
12779 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12782 class LUInstanceQueryData(NoHooksLU):
12783 """Query runtime instance data.
12788 def ExpandNames(self):
12789 self.needed_locks = {}
12791 # Use locking if requested or when non-static information is wanted
12792 if not (self.op.static or self.op.use_locking):
12793 self.LogWarning("Non-static data requested, locks need to be acquired")
12794 self.op.use_locking = True
12796 if self.op.instances or not self.op.use_locking:
12797 # Expand instance names right here
12798 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12800 # Will use acquired locks
12801 self.wanted_names = None
12803 if self.op.use_locking:
12804 self.share_locks = _ShareAll()
12806 if self.wanted_names is None:
12807 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12809 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12811 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12812 self.needed_locks[locking.LEVEL_NODE] = []
12813 self.needed_locks[locking.LEVEL_NETWORK] = []
12814 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12816 def DeclareLocks(self, level):
12817 if self.op.use_locking:
12818 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12819 if level == locking.LEVEL_NODEGROUP:
12821 # Lock all groups used by instances optimistically; this requires going
12822 # via the node before it's locked, requiring verification later on
12823 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12824 frozenset(group_uuid
12825 for instance_name in owned_instances
12827 self.cfg.GetInstanceNodeGroups(instance_name))
12829 elif level == locking.LEVEL_NODE:
12830 self._LockInstancesNodes()
12832 elif level == locking.LEVEL_NETWORK:
12833 self.needed_locks[locking.LEVEL_NETWORK] = \
12835 for instance_name in owned_instances
12837 self.cfg.GetInstanceNetworks(instance_name))
12839 def CheckPrereq(self):
12840 """Check prerequisites.
12842 This only checks the optional instance list against the existing names.
12845 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12846 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12847 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12848 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12850 if self.wanted_names is None:
12851 assert self.op.use_locking, "Locking was not used"
12852 self.wanted_names = owned_instances
12854 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12856 if self.op.use_locking:
12857 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12860 assert not (owned_instances or owned_groups or
12861 owned_nodes or owned_networks)
12863 self.wanted_instances = instances.values()
12865 def _ComputeBlockdevStatus(self, node, instance, dev):
12866 """Returns the status of a block device
12869 if self.op.static or not node:
12872 self.cfg.SetDiskID(dev, node)
12874 result = self.rpc.call_blockdev_find(node, dev)
12878 result.Raise("Can't compute disk status for %s" % instance.name)
12880 status = result.payload
12884 return (status.dev_path, status.major, status.minor,
12885 status.sync_percent, status.estimated_time,
12886 status.is_degraded, status.ldisk_status)
12888 def _ComputeDiskStatus(self, instance, snode, dev):
12889 """Compute block device status.
12892 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12894 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12896 def _ComputeDiskStatusInner(self, instance, snode, dev):
12897 """Compute block device status.
12899 @attention: The device has to be annotated already.
12902 if dev.dev_type in constants.LDS_DRBD:
12903 # we change the snode then (otherwise we use the one passed in)
12904 if dev.logical_id[0] == instance.primary_node:
12905 snode = dev.logical_id[1]
12907 snode = dev.logical_id[0]
12909 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12911 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12914 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12921 "iv_name": dev.iv_name,
12922 "dev_type": dev.dev_type,
12923 "logical_id": dev.logical_id,
12924 "physical_id": dev.physical_id,
12925 "pstatus": dev_pstatus,
12926 "sstatus": dev_sstatus,
12927 "children": dev_children,
12932 def Exec(self, feedback_fn):
12933 """Gather and return data"""
12936 cluster = self.cfg.GetClusterInfo()
12938 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12939 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12941 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12942 for node in nodes.values()))
12944 group2name_fn = lambda uuid: groups[uuid].name
12945 for instance in self.wanted_instances:
12946 pnode = nodes[instance.primary_node]
12948 if self.op.static or pnode.offline:
12949 remote_state = None
12951 self.LogWarning("Primary node %s is marked offline, returning static"
12952 " information only for instance %s" %
12953 (pnode.name, instance.name))
12955 remote_info = self.rpc.call_instance_info(instance.primary_node,
12957 instance.hypervisor)
12958 remote_info.Raise("Error checking node %s" % instance.primary_node)
12959 remote_info = remote_info.payload
12960 if remote_info and "state" in remote_info:
12961 remote_state = "up"
12963 if instance.admin_state == constants.ADMINST_UP:
12964 remote_state = "down"
12966 remote_state = instance.admin_state
12968 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12971 snodes_group_uuids = [nodes[snode_name].group
12972 for snode_name in instance.secondary_nodes]
12974 result[instance.name] = {
12975 "name": instance.name,
12976 "config_state": instance.admin_state,
12977 "run_state": remote_state,
12978 "pnode": instance.primary_node,
12979 "pnode_group_uuid": pnode.group,
12980 "pnode_group_name": group2name_fn(pnode.group),
12981 "snodes": instance.secondary_nodes,
12982 "snodes_group_uuids": snodes_group_uuids,
12983 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12985 # this happens to be the same format used for hooks
12986 "nics": _NICListToTuple(self, instance.nics),
12987 "disk_template": instance.disk_template,
12989 "hypervisor": instance.hypervisor,
12990 "network_port": instance.network_port,
12991 "hv_instance": instance.hvparams,
12992 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12993 "be_instance": instance.beparams,
12994 "be_actual": cluster.FillBE(instance),
12995 "os_instance": instance.osparams,
12996 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12997 "serial_no": instance.serial_no,
12998 "mtime": instance.mtime,
12999 "ctime": instance.ctime,
13000 "uuid": instance.uuid,
13006 def PrepareContainerMods(mods, private_fn):
13007 """Prepares a list of container modifications by adding a private data field.
13009 @type mods: list of tuples; (operation, index, parameters)
13010 @param mods: List of modifications
13011 @type private_fn: callable or None
13012 @param private_fn: Callable for constructing a private data field for a
13017 if private_fn is None:
13022 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13025 #: Type description for changes as returned by L{ApplyContainerMods}'s
13027 _TApplyContModsCbChanges = \
13028 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13029 ht.TNonEmptyString,
13034 def ApplyContainerMods(kind, container, chgdesc, mods,
13035 create_fn, modify_fn, remove_fn):
13036 """Applies descriptions in C{mods} to C{container}.
13039 @param kind: One-word item description
13040 @type container: list
13041 @param container: Container to modify
13042 @type chgdesc: None or list
13043 @param chgdesc: List of applied changes
13045 @param mods: Modifications as returned by L{PrepareContainerMods}
13046 @type create_fn: callable
13047 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13048 receives absolute item index, parameters and private data object as added
13049 by L{PrepareContainerMods}, returns tuple containing new item and changes
13051 @type modify_fn: callable
13052 @param modify_fn: Callback for modifying an existing item
13053 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13054 and private data object as added by L{PrepareContainerMods}, returns
13056 @type remove_fn: callable
13057 @param remove_fn: Callback on removing item; receives absolute item index,
13058 item and private data object as added by L{PrepareContainerMods}
13061 for (op, idx, params, private) in mods:
13064 absidx = len(container) - 1
13066 raise IndexError("Not accepting negative indices other than -1")
13067 elif idx > len(container):
13068 raise IndexError("Got %s index %s, but there are only %s" %
13069 (kind, idx, len(container)))
13075 if op == constants.DDM_ADD:
13076 # Calculate where item will be added
13078 addidx = len(container)
13082 if create_fn is None:
13085 (item, changes) = create_fn(addidx, params, private)
13088 container.append(item)
13091 assert idx <= len(container)
13092 # list.insert does so before the specified index
13093 container.insert(idx, item)
13095 # Retrieve existing item
13097 item = container[absidx]
13099 raise IndexError("Invalid %s index %s" % (kind, idx))
13101 if op == constants.DDM_REMOVE:
13104 if remove_fn is not None:
13105 remove_fn(absidx, item, private)
13107 changes = [("%s/%s" % (kind, absidx), "remove")]
13109 assert container[absidx] == item
13110 del container[absidx]
13111 elif op == constants.DDM_MODIFY:
13112 if modify_fn is not None:
13113 changes = modify_fn(absidx, item, params, private)
13115 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13117 assert _TApplyContModsCbChanges(changes)
13119 if not (chgdesc is None or changes is None):
13120 chgdesc.extend(changes)
13123 def _UpdateIvNames(base_index, disks):
13124 """Updates the C{iv_name} attribute of disks.
13126 @type disks: list of L{objects.Disk}
13129 for (idx, disk) in enumerate(disks):
13130 disk.iv_name = "disk/%s" % (base_index + idx, )
13133 class _InstNicModPrivate:
13134 """Data structure for network interface modifications.
13136 Used by L{LUInstanceSetParams}.
13139 def __init__(self):
13144 class LUInstanceSetParams(LogicalUnit):
13145 """Modifies an instances's parameters.
13148 HPATH = "instance-modify"
13149 HTYPE = constants.HTYPE_INSTANCE
13153 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13154 assert ht.TList(mods)
13155 assert not mods or len(mods[0]) in (2, 3)
13157 if mods and len(mods[0]) == 2:
13161 for op, params in mods:
13162 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13163 result.append((op, -1, params))
13167 raise errors.OpPrereqError("Only one %s add or remove operation is"
13168 " supported at a time" % kind,
13169 errors.ECODE_INVAL)
13171 result.append((constants.DDM_MODIFY, op, params))
13173 assert verify_fn(result)
13180 def _CheckMods(kind, mods, key_types, item_fn):
13181 """Ensures requested disk/NIC modifications are valid.
13184 for (op, _, params) in mods:
13185 assert ht.TDict(params)
13187 # If 'key_types' is an empty dict, we assume we have an
13188 # 'ext' template and thus do not ForceDictType
13190 utils.ForceDictType(params, key_types)
13192 if op == constants.DDM_REMOVE:
13194 raise errors.OpPrereqError("No settings should be passed when"
13195 " removing a %s" % kind,
13196 errors.ECODE_INVAL)
13197 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13198 item_fn(op, params)
13200 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13203 def _VerifyDiskModification(op, params):
13204 """Verifies a disk modification.
13207 if op == constants.DDM_ADD:
13208 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13209 if mode not in constants.DISK_ACCESS_SET:
13210 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13211 errors.ECODE_INVAL)
13213 size = params.get(constants.IDISK_SIZE, None)
13215 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13216 constants.IDISK_SIZE, errors.ECODE_INVAL)
13220 except (TypeError, ValueError), err:
13221 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13222 errors.ECODE_INVAL)
13224 params[constants.IDISK_SIZE] = size
13226 elif op == constants.DDM_MODIFY:
13227 if constants.IDISK_SIZE in params:
13228 raise errors.OpPrereqError("Disk size change not possible, use"
13229 " grow-disk", errors.ECODE_INVAL)
13230 if constants.IDISK_MODE not in params:
13231 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13232 " modification supported, but missing",
13233 errors.ECODE_NOENT)
13234 if len(params) > 1:
13235 raise errors.OpPrereqError("Disk modification doesn't support"
13236 " additional arbitrary parameters",
13237 errors.ECODE_INVAL)
13240 def _VerifyNicModification(op, params):
13241 """Verifies a network interface modification.
13244 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13245 ip = params.get(constants.INIC_IP, None)
13246 req_net = params.get(constants.INIC_NETWORK, None)
13247 link = params.get(constants.NIC_LINK, None)
13248 mode = params.get(constants.NIC_MODE, None)
13249 if req_net is not None:
13250 if req_net.lower() == constants.VALUE_NONE:
13251 params[constants.INIC_NETWORK] = None
13253 elif link is not None or mode is not None:
13254 raise errors.OpPrereqError("If network is given"
13255 " mode or link should not",
13256 errors.ECODE_INVAL)
13258 if op == constants.DDM_ADD:
13259 macaddr = params.get(constants.INIC_MAC, None)
13260 if macaddr is None:
13261 params[constants.INIC_MAC] = constants.VALUE_AUTO
13264 if ip.lower() == constants.VALUE_NONE:
13265 params[constants.INIC_IP] = None
13267 if ip.lower() == constants.NIC_IP_POOL:
13268 if op == constants.DDM_ADD and req_net is None:
13269 raise errors.OpPrereqError("If ip=pool, parameter network"
13271 errors.ECODE_INVAL)
13273 if not netutils.IPAddress.IsValid(ip):
13274 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13275 errors.ECODE_INVAL)
13277 if constants.INIC_MAC in params:
13278 macaddr = params[constants.INIC_MAC]
13279 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13280 macaddr = utils.NormalizeAndValidateMac(macaddr)
13282 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13283 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13284 " modifying an existing NIC",
13285 errors.ECODE_INVAL)
13287 def CheckArguments(self):
13288 if not (self.op.nics or self.op.disks or self.op.disk_template or
13289 self.op.hvparams or self.op.beparams or self.op.os_name or
13290 self.op.offline is not None or self.op.runtime_mem):
13291 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13293 if self.op.hvparams:
13294 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13295 "hypervisor", "instance", "cluster")
13297 self.op.disks = self._UpgradeDiskNicMods(
13298 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13299 self.op.nics = self._UpgradeDiskNicMods(
13300 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13302 if self.op.disks and self.op.disk_template is not None:
13303 raise errors.OpPrereqError("Disk template conversion and other disk"
13304 " changes not supported at the same time",
13305 errors.ECODE_INVAL)
13307 if (self.op.disk_template and
13308 self.op.disk_template in constants.DTS_INT_MIRROR and
13309 self.op.remote_node is None):
13310 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13311 " one requires specifying a secondary node",
13312 errors.ECODE_INVAL)
13314 # Check NIC modifications
13315 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13316 self._VerifyNicModification)
13318 def ExpandNames(self):
13319 self._ExpandAndLockInstance()
13320 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13321 # Can't even acquire node locks in shared mode as upcoming changes in
13322 # Ganeti 2.6 will start to modify the node object on disk conversion
13323 self.needed_locks[locking.LEVEL_NODE] = []
13324 self.needed_locks[locking.LEVEL_NODE_RES] = []
13325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13326 # Look node group to look up the ipolicy
13327 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13329 def DeclareLocks(self, level):
13330 if level == locking.LEVEL_NODEGROUP:
13331 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13332 # Acquire locks for the instance's nodegroups optimistically. Needs
13333 # to be verified in CheckPrereq
13334 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13335 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13336 elif level == locking.LEVEL_NODE:
13337 self._LockInstancesNodes()
13338 if self.op.disk_template and self.op.remote_node:
13339 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13340 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13341 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13343 self.needed_locks[locking.LEVEL_NODE_RES] = \
13344 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13346 def BuildHooksEnv(self):
13347 """Build hooks env.
13349 This runs on the master, primary and secondaries.
13353 if constants.BE_MINMEM in self.be_new:
13354 args["minmem"] = self.be_new[constants.BE_MINMEM]
13355 if constants.BE_MAXMEM in self.be_new:
13356 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13357 if constants.BE_VCPUS in self.be_new:
13358 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13359 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13360 # information at all.
13362 if self._new_nics is not None:
13365 for nic in self._new_nics:
13366 n = copy.deepcopy(nic)
13367 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13368 n.nicparams = nicparams
13369 nics.append(_NICToTuple(self, n))
13371 args["nics"] = nics
13373 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13374 if self.op.disk_template:
13375 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13376 if self.op.runtime_mem:
13377 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13381 def BuildHooksNodes(self):
13382 """Build hooks nodes.
13385 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13388 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13389 old_params, cluster, pnode):
13391 update_params_dict = dict([(key, params[key])
13392 for key in constants.NICS_PARAMETERS
13395 req_link = update_params_dict.get(constants.NIC_LINK, None)
13396 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13398 new_net_uuid = None
13399 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13400 if new_net_uuid_or_name:
13401 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13402 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13405 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13408 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13410 raise errors.OpPrereqError("No netparams found for the network"
13411 " %s, probably not connected" %
13412 new_net_obj.name, errors.ECODE_INVAL)
13413 new_params = dict(netparams)
13415 new_params = _GetUpdatedParams(old_params, update_params_dict)
13417 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13419 new_filled_params = cluster.SimpleFillNIC(new_params)
13420 objects.NIC.CheckParameterSyntax(new_filled_params)
13422 new_mode = new_filled_params[constants.NIC_MODE]
13423 if new_mode == constants.NIC_MODE_BRIDGED:
13424 bridge = new_filled_params[constants.NIC_LINK]
13425 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13427 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13429 self.warn.append(msg)
13431 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13433 elif new_mode == constants.NIC_MODE_ROUTED:
13434 ip = params.get(constants.INIC_IP, old_ip)
13436 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13437 " on a routed NIC", errors.ECODE_INVAL)
13439 elif new_mode == constants.NIC_MODE_OVS:
13440 # TODO: check OVS link
13441 self.LogInfo("OVS links are currently not checked for correctness")
13443 if constants.INIC_MAC in params:
13444 mac = params[constants.INIC_MAC]
13446 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13447 errors.ECODE_INVAL)
13448 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13449 # otherwise generate the MAC address
13450 params[constants.INIC_MAC] = \
13451 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13453 # or validate/reserve the current one
13455 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13456 except errors.ReservationError:
13457 raise errors.OpPrereqError("MAC address '%s' already in use"
13458 " in cluster" % mac,
13459 errors.ECODE_NOTUNIQUE)
13460 elif new_net_uuid != old_net_uuid:
13462 def get_net_prefix(net_uuid):
13465 nobj = self.cfg.GetNetwork(net_uuid)
13466 mac_prefix = nobj.mac_prefix
13470 new_prefix = get_net_prefix(new_net_uuid)
13471 old_prefix = get_net_prefix(old_net_uuid)
13472 if old_prefix != new_prefix:
13473 params[constants.INIC_MAC] = \
13474 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13476 # if there is a change in (ip, network) tuple
13477 new_ip = params.get(constants.INIC_IP, old_ip)
13478 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13480 # if IP is pool then require a network and generate one IP
13481 if new_ip.lower() == constants.NIC_IP_POOL:
13484 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13485 except errors.ReservationError:
13486 raise errors.OpPrereqError("Unable to get a free IP"
13487 " from the address pool",
13488 errors.ECODE_STATE)
13489 self.LogInfo("Chose IP %s from network %s",
13492 params[constants.INIC_IP] = new_ip
13494 raise errors.OpPrereqError("ip=pool, but no network found",
13495 errors.ECODE_INVAL)
13496 # Reserve new IP if in the new network if any
13499 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13500 self.LogInfo("Reserving IP %s in network %s",
13501 new_ip, new_net_obj.name)
13502 except errors.ReservationError:
13503 raise errors.OpPrereqError("IP %s not available in network %s" %
13504 (new_ip, new_net_obj.name),
13505 errors.ECODE_NOTUNIQUE)
13506 # new network is None so check if new IP is a conflicting IP
13507 elif self.op.conflicts_check:
13508 _CheckForConflictingIp(self, new_ip, pnode)
13510 # release old IP if old network is not None
13511 if old_ip and old_net_uuid:
13513 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13514 except errors.AddressPoolError:
13515 logging.warning("Release IP %s not contained in network %s",
13516 old_ip, old_net_obj.name)
13518 # there are no changes in (ip, network) tuple and old network is not None
13519 elif (old_net_uuid is not None and
13520 (req_link is not None or req_mode is not None)):
13521 raise errors.OpPrereqError("Not allowed to change link or mode of"
13522 " a NIC that is connected to a network",
13523 errors.ECODE_INVAL)
13525 private.params = new_params
13526 private.filled = new_filled_params
13528 def _PreCheckDiskTemplate(self, pnode_info):
13529 """CheckPrereq checks related to a new disk template."""
13530 # Arguments are passed to avoid configuration lookups
13531 instance = self.instance
13532 pnode = instance.primary_node
13533 cluster = self.cluster
13534 if instance.disk_template == self.op.disk_template:
13535 raise errors.OpPrereqError("Instance already has disk template %s" %
13536 instance.disk_template, errors.ECODE_INVAL)
13538 if (instance.disk_template,
13539 self.op.disk_template) not in self._DISK_CONVERSIONS:
13540 raise errors.OpPrereqError("Unsupported disk template conversion from"
13541 " %s to %s" % (instance.disk_template,
13542 self.op.disk_template),
13543 errors.ECODE_INVAL)
13544 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13545 msg="cannot change disk template")
13546 if self.op.disk_template in constants.DTS_INT_MIRROR:
13547 if self.op.remote_node == pnode:
13548 raise errors.OpPrereqError("Given new secondary node %s is the same"
13549 " as the primary node of the instance" %
13550 self.op.remote_node, errors.ECODE_STATE)
13551 _CheckNodeOnline(self, self.op.remote_node)
13552 _CheckNodeNotDrained(self, self.op.remote_node)
13553 # FIXME: here we assume that the old instance type is DT_PLAIN
13554 assert instance.disk_template == constants.DT_PLAIN
13555 disks = [{constants.IDISK_SIZE: d.size,
13556 constants.IDISK_VG: d.logical_id[0]}
13557 for d in instance.disks]
13558 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13559 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13561 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13562 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13563 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13565 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13566 ignore=self.op.ignore_ipolicy)
13567 if pnode_info.group != snode_info.group:
13568 self.LogWarning("The primary and secondary nodes are in two"
13569 " different node groups; the disk parameters"
13570 " from the first disk's node group will be"
13573 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13574 # Make sure none of the nodes require exclusive storage
13575 nodes = [pnode_info]
13576 if self.op.disk_template in constants.DTS_INT_MIRROR:
13578 nodes.append(snode_info)
13579 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13580 if compat.any(map(has_es, nodes)):
13581 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13582 " storage is enabled" % (instance.disk_template,
13583 self.op.disk_template))
13584 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13586 def CheckPrereq(self):
13587 """Check prerequisites.
13589 This only checks the instance list against the existing names.
13592 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13593 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13595 cluster = self.cluster = self.cfg.GetClusterInfo()
13596 assert self.instance is not None, \
13597 "Cannot retrieve locked instance %s" % self.op.instance_name
13599 pnode = instance.primary_node
13600 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13601 nodelist = list(instance.all_nodes)
13602 pnode_info = self.cfg.GetNodeInfo(pnode)
13603 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13605 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13606 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13607 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13609 # dictionary with instance information after the modification
13612 # Check disk modifications. This is done here and not in CheckArguments
13613 # (as with NICs), because we need to know the instance's disk template
13614 if instance.disk_template == constants.DT_EXT:
13615 self._CheckMods("disk", self.op.disks, {},
13616 self._VerifyDiskModification)
13618 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13619 self._VerifyDiskModification)
13621 # Prepare disk/NIC modifications
13622 self.diskmod = PrepareContainerMods(self.op.disks, None)
13623 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13625 # Check the validity of the `provider' parameter
13626 if instance.disk_template in constants.DT_EXT:
13627 for mod in self.diskmod:
13628 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13629 if mod[0] == constants.DDM_ADD:
13630 if ext_provider is None:
13631 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13632 " '%s' missing, during disk add" %
13634 constants.IDISK_PROVIDER),
13635 errors.ECODE_NOENT)
13636 elif mod[0] == constants.DDM_MODIFY:
13638 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13640 constants.IDISK_PROVIDER,
13641 errors.ECODE_INVAL)
13643 for mod in self.diskmod:
13644 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13645 if ext_provider is not None:
13646 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13647 " instances of type '%s'" %
13648 (constants.IDISK_PROVIDER,
13650 errors.ECODE_INVAL)
13653 if self.op.os_name and not self.op.force:
13654 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13655 self.op.force_variant)
13656 instance_os = self.op.os_name
13658 instance_os = instance.os
13660 assert not (self.op.disk_template and self.op.disks), \
13661 "Can't modify disk template and apply disk changes at the same time"
13663 if self.op.disk_template:
13664 self._PreCheckDiskTemplate(pnode_info)
13666 # hvparams processing
13667 if self.op.hvparams:
13668 hv_type = instance.hypervisor
13669 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13670 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13671 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13674 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13675 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13676 self.hv_proposed = self.hv_new = hv_new # the new actual values
13677 self.hv_inst = i_hvdict # the new dict (without defaults)
13679 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13681 self.hv_new = self.hv_inst = {}
13683 # beparams processing
13684 if self.op.beparams:
13685 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13687 objects.UpgradeBeParams(i_bedict)
13688 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13689 be_new = cluster.SimpleFillBE(i_bedict)
13690 self.be_proposed = self.be_new = be_new # the new actual values
13691 self.be_inst = i_bedict # the new dict (without defaults)
13693 self.be_new = self.be_inst = {}
13694 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13695 be_old = cluster.FillBE(instance)
13697 # CPU param validation -- checking every time a parameter is
13698 # changed to cover all cases where either CPU mask or vcpus have
13700 if (constants.BE_VCPUS in self.be_proposed and
13701 constants.HV_CPU_MASK in self.hv_proposed):
13703 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13704 # Verify mask is consistent with number of vCPUs. Can skip this
13705 # test if only 1 entry in the CPU mask, which means same mask
13706 # is applied to all vCPUs.
13707 if (len(cpu_list) > 1 and
13708 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13709 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13711 (self.be_proposed[constants.BE_VCPUS],
13712 self.hv_proposed[constants.HV_CPU_MASK]),
13713 errors.ECODE_INVAL)
13715 # Only perform this test if a new CPU mask is given
13716 if constants.HV_CPU_MASK in self.hv_new:
13717 # Calculate the largest CPU number requested
13718 max_requested_cpu = max(map(max, cpu_list))
13719 # Check that all of the instance's nodes have enough physical CPUs to
13720 # satisfy the requested CPU mask
13721 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13722 max_requested_cpu + 1, instance.hypervisor)
13724 # osparams processing
13725 if self.op.osparams:
13726 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13727 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13728 self.os_inst = i_osdict # the new dict (without defaults)
13734 #TODO(dynmem): do the appropriate check involving MINMEM
13735 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13736 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13737 mem_check_list = [pnode]
13738 if be_new[constants.BE_AUTO_BALANCE]:
13739 # either we changed auto_balance to yes or it was from before
13740 mem_check_list.extend(instance.secondary_nodes)
13741 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13742 instance.hypervisor)
13743 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13744 [instance.hypervisor], False)
13745 pninfo = nodeinfo[pnode]
13746 msg = pninfo.fail_msg
13748 # Assume the primary node is unreachable and go ahead
13749 self.warn.append("Can't get info from primary node %s: %s" %
13752 (_, _, (pnhvinfo, )) = pninfo.payload
13753 if not isinstance(pnhvinfo.get("memory_free", None), int):
13754 self.warn.append("Node data from primary node %s doesn't contain"
13755 " free memory information" % pnode)
13756 elif instance_info.fail_msg:
13757 self.warn.append("Can't get instance runtime information: %s" %
13758 instance_info.fail_msg)
13760 if instance_info.payload:
13761 current_mem = int(instance_info.payload["memory"])
13763 # Assume instance not running
13764 # (there is a slight race condition here, but it's not very
13765 # probable, and we have no other way to check)
13766 # TODO: Describe race condition
13768 #TODO(dynmem): do the appropriate check involving MINMEM
13769 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13770 pnhvinfo["memory_free"])
13772 raise errors.OpPrereqError("This change will prevent the instance"
13773 " from starting, due to %d MB of memory"
13774 " missing on its primary node" %
13775 miss_mem, errors.ECODE_NORES)
13777 if be_new[constants.BE_AUTO_BALANCE]:
13778 for node, nres in nodeinfo.items():
13779 if node not in instance.secondary_nodes:
13781 nres.Raise("Can't get info from secondary node %s" % node,
13782 prereq=True, ecode=errors.ECODE_STATE)
13783 (_, _, (nhvinfo, )) = nres.payload
13784 if not isinstance(nhvinfo.get("memory_free", None), int):
13785 raise errors.OpPrereqError("Secondary node %s didn't return free"
13786 " memory information" % node,
13787 errors.ECODE_STATE)
13788 #TODO(dynmem): do the appropriate check involving MINMEM
13789 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13790 raise errors.OpPrereqError("This change will prevent the instance"
13791 " from failover to its secondary node"
13792 " %s, due to not enough memory" % node,
13793 errors.ECODE_STATE)
13795 if self.op.runtime_mem:
13796 remote_info = self.rpc.call_instance_info(instance.primary_node,
13798 instance.hypervisor)
13799 remote_info.Raise("Error checking node %s" % instance.primary_node)
13800 if not remote_info.payload: # not running already
13801 raise errors.OpPrereqError("Instance %s is not running" %
13802 instance.name, errors.ECODE_STATE)
13804 current_memory = remote_info.payload["memory"]
13805 if (not self.op.force and
13806 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13807 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13808 raise errors.OpPrereqError("Instance %s must have memory between %d"
13809 " and %d MB of memory unless --force is"
13812 self.be_proposed[constants.BE_MINMEM],
13813 self.be_proposed[constants.BE_MAXMEM]),
13814 errors.ECODE_INVAL)
13816 delta = self.op.runtime_mem - current_memory
13818 _CheckNodeFreeMemory(self, instance.primary_node,
13819 "ballooning memory for instance %s" %
13820 instance.name, delta, instance.hypervisor)
13822 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13823 raise errors.OpPrereqError("Disk operations not supported for"
13824 " diskless instances", errors.ECODE_INVAL)
13826 def _PrepareNicCreate(_, params, private):
13827 self._PrepareNicModification(params, private, None, None,
13828 {}, cluster, pnode)
13829 return (None, None)
13831 def _PrepareNicMod(_, nic, params, private):
13832 self._PrepareNicModification(params, private, nic.ip, nic.network,
13833 nic.nicparams, cluster, pnode)
13836 def _PrepareNicRemove(_, params, __):
13838 net = params.network
13839 if net is not None and ip is not None:
13840 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13842 # Verify NIC changes (operating on copy)
13843 nics = instance.nics[:]
13844 ApplyContainerMods("NIC", nics, None, self.nicmod,
13845 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13846 if len(nics) > constants.MAX_NICS:
13847 raise errors.OpPrereqError("Instance has too many network interfaces"
13848 " (%d), cannot add more" % constants.MAX_NICS,
13849 errors.ECODE_STATE)
13851 # Verify disk changes (operating on a copy)
13852 disks = instance.disks[:]
13853 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13854 if len(disks) > constants.MAX_DISKS:
13855 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13856 " more" % constants.MAX_DISKS,
13857 errors.ECODE_STATE)
13858 disk_sizes = [disk.size for disk in instance.disks]
13859 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13860 self.diskmod if op == constants.DDM_ADD)
13861 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13862 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13864 if self.op.offline is not None and self.op.offline:
13865 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13866 msg="can't change to offline")
13868 # Pre-compute NIC changes (necessary to use result in hooks)
13869 self._nic_chgdesc = []
13871 # Operate on copies as this is still in prereq
13872 nics = [nic.Copy() for nic in instance.nics]
13873 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13874 self._CreateNewNic, self._ApplyNicMods, None)
13875 self._new_nics = nics
13876 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13878 self._new_nics = None
13879 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13881 if not self.op.ignore_ipolicy:
13882 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13885 # Fill ispec with backend parameters
13886 ispec[constants.ISPEC_SPINDLE_USE] = \
13887 self.be_new.get(constants.BE_SPINDLE_USE, None)
13888 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13891 # Copy ispec to verify parameters with min/max values separately
13892 ispec_max = ispec.copy()
13893 ispec_max[constants.ISPEC_MEM_SIZE] = \
13894 self.be_new.get(constants.BE_MAXMEM, None)
13895 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13896 ispec_min = ispec.copy()
13897 ispec_min[constants.ISPEC_MEM_SIZE] = \
13898 self.be_new.get(constants.BE_MINMEM, None)
13899 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13901 if (res_max or res_min):
13902 # FIXME: Improve error message by including information about whether
13903 # the upper or lower limit of the parameter fails the ipolicy.
13904 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13905 (group_info, group_info.name,
13906 utils.CommaJoin(set(res_max + res_min))))
13907 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13909 def _ConvertPlainToDrbd(self, feedback_fn):
13910 """Converts an instance from plain to drbd.
13913 feedback_fn("Converting template to drbd")
13914 instance = self.instance
13915 pnode = instance.primary_node
13916 snode = self.op.remote_node
13918 assert instance.disk_template == constants.DT_PLAIN
13920 # create a fake disk info for _GenerateDiskTemplate
13921 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13922 constants.IDISK_VG: d.logical_id[0]}
13923 for d in instance.disks]
13924 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13925 instance.name, pnode, [snode],
13926 disk_info, None, None, 0, feedback_fn,
13928 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13930 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13931 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13932 info = _GetInstanceInfoText(instance)
13933 feedback_fn("Creating additional volumes...")
13934 # first, create the missing data and meta devices
13935 for disk in anno_disks:
13936 # unfortunately this is... not too nice
13937 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13938 info, True, p_excl_stor)
13939 for child in disk.children:
13940 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13942 # at this stage, all new LVs have been created, we can rename the
13944 feedback_fn("Renaming original volumes...")
13945 rename_list = [(o, n.children[0].logical_id)
13946 for (o, n) in zip(instance.disks, new_disks)]
13947 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13948 result.Raise("Failed to rename original LVs")
13950 feedback_fn("Initializing DRBD devices...")
13951 # all child devices are in place, we can now create the DRBD devices
13952 for disk in anno_disks:
13953 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13954 f_create = node == pnode
13955 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13958 # at this point, the instance has been modified
13959 instance.disk_template = constants.DT_DRBD8
13960 instance.disks = new_disks
13961 self.cfg.Update(instance, feedback_fn)
13963 # Release node locks while waiting for sync
13964 _ReleaseLocks(self, locking.LEVEL_NODE)
13966 # disks are created, waiting for sync
13967 disk_abort = not _WaitForSync(self, instance,
13968 oneshot=not self.op.wait_for_sync)
13970 raise errors.OpExecError("There are some degraded disks for"
13971 " this instance, please cleanup manually")
13973 # Node resource locks will be released by caller
13975 def _ConvertDrbdToPlain(self, feedback_fn):
13976 """Converts an instance from drbd to plain.
13979 instance = self.instance
13981 assert len(instance.secondary_nodes) == 1
13982 assert instance.disk_template == constants.DT_DRBD8
13984 pnode = instance.primary_node
13985 snode = instance.secondary_nodes[0]
13986 feedback_fn("Converting template to plain")
13988 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13989 new_disks = [d.children[0] for d in instance.disks]
13991 # copy over size and mode
13992 for parent, child in zip(old_disks, new_disks):
13993 child.size = parent.size
13994 child.mode = parent.mode
13996 # this is a DRBD disk, return its port to the pool
13997 # NOTE: this must be done right before the call to cfg.Update!
13998 for disk in old_disks:
13999 tcp_port = disk.logical_id[2]
14000 self.cfg.AddTcpUdpPort(tcp_port)
14002 # update instance structure
14003 instance.disks = new_disks
14004 instance.disk_template = constants.DT_PLAIN
14005 self.cfg.Update(instance, feedback_fn)
14007 # Release locks in case removing disks takes a while
14008 _ReleaseLocks(self, locking.LEVEL_NODE)
14010 feedback_fn("Removing volumes on the secondary node...")
14011 for disk in old_disks:
14012 self.cfg.SetDiskID(disk, snode)
14013 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14015 self.LogWarning("Could not remove block device %s on node %s,"
14016 " continuing anyway: %s", disk.iv_name, snode, msg)
14018 feedback_fn("Removing unneeded volumes on the primary node...")
14019 for idx, disk in enumerate(old_disks):
14020 meta = disk.children[1]
14021 self.cfg.SetDiskID(meta, pnode)
14022 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14024 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14025 " continuing anyway: %s", idx, pnode, msg)
14027 def _CreateNewDisk(self, idx, params, _):
14028 """Creates a new disk.
14031 instance = self.instance
14034 if instance.disk_template in constants.DTS_FILEBASED:
14035 (file_driver, file_path) = instance.disks[0].logical_id
14036 file_path = os.path.dirname(file_path)
14038 file_driver = file_path = None
14041 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14042 instance.primary_node, instance.secondary_nodes,
14043 [params], file_path, file_driver, idx,
14044 self.Log, self.diskparams)[0]
14046 info = _GetInstanceInfoText(instance)
14048 logging.info("Creating volume %s for instance %s",
14049 disk.iv_name, instance.name)
14050 # Note: this needs to be kept in sync with _CreateDisks
14052 for node in instance.all_nodes:
14053 f_create = (node == instance.primary_node)
14055 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14056 except errors.OpExecError, err:
14057 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14058 disk.iv_name, disk, node, err)
14061 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14065 def _ModifyDisk(idx, disk, params, _):
14066 """Modifies a disk.
14069 disk.mode = params[constants.IDISK_MODE]
14072 ("disk.mode/%d" % idx, disk.mode),
14075 def _RemoveDisk(self, idx, root, _):
14079 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14080 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14081 self.cfg.SetDiskID(disk, node)
14082 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14084 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14085 " continuing anyway", idx, node, msg)
14087 # if this is a DRBD disk, return its port to the pool
14088 if root.dev_type in constants.LDS_DRBD:
14089 self.cfg.AddTcpUdpPort(root.logical_id[2])
14091 def _CreateNewNic(self, idx, params, private):
14092 """Creates data structure for a new network interface.
14095 mac = params[constants.INIC_MAC]
14096 ip = params.get(constants.INIC_IP, None)
14097 net = params.get(constants.INIC_NETWORK, None)
14098 net_uuid = self.cfg.LookupNetwork(net)
14099 #TODO: not private.filled?? can a nic have no nicparams??
14100 nicparams = private.filled
14101 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14105 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14106 (mac, ip, private.filled[constants.NIC_MODE],
14107 private.filled[constants.NIC_LINK],
14111 def _ApplyNicMods(self, idx, nic, params, private):
14112 """Modifies a network interface.
14117 for key in [constants.INIC_MAC, constants.INIC_IP]:
14119 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14120 setattr(nic, key, params[key])
14122 new_net = params.get(constants.INIC_NETWORK, nic.network)
14123 new_net_uuid = self.cfg.LookupNetwork(new_net)
14124 if new_net_uuid != nic.network:
14125 changes.append(("nic.network/%d" % idx, new_net))
14126 nic.network = new_net_uuid
14129 nic.nicparams = private.filled
14131 for (key, val) in nic.nicparams.items():
14132 changes.append(("nic.%s/%d" % (key, idx), val))
14136 def Exec(self, feedback_fn):
14137 """Modifies an instance.
14139 All parameters take effect only at the next restart of the instance.
14142 # Process here the warnings from CheckPrereq, as we don't have a
14143 # feedback_fn there.
14144 # TODO: Replace with self.LogWarning
14145 for warn in self.warn:
14146 feedback_fn("WARNING: %s" % warn)
14148 assert ((self.op.disk_template is None) ^
14149 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14150 "Not owning any node resource locks"
14153 instance = self.instance
14156 if self.op.runtime_mem:
14157 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14159 self.op.runtime_mem)
14160 rpcres.Raise("Cannot modify instance runtime memory")
14161 result.append(("runtime_memory", self.op.runtime_mem))
14163 # Apply disk changes
14164 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14165 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14166 _UpdateIvNames(0, instance.disks)
14168 if self.op.disk_template:
14170 check_nodes = set(instance.all_nodes)
14171 if self.op.remote_node:
14172 check_nodes.add(self.op.remote_node)
14173 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14174 owned = self.owned_locks(level)
14175 assert not (check_nodes - owned), \
14176 ("Not owning the correct locks, owning %r, expected at least %r" %
14177 (owned, check_nodes))
14179 r_shut = _ShutdownInstanceDisks(self, instance)
14181 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14182 " proceed with disk template conversion")
14183 mode = (instance.disk_template, self.op.disk_template)
14185 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14187 self.cfg.ReleaseDRBDMinors(instance.name)
14189 result.append(("disk_template", self.op.disk_template))
14191 assert instance.disk_template == self.op.disk_template, \
14192 ("Expected disk template '%s', found '%s'" %
14193 (self.op.disk_template, instance.disk_template))
14195 # Release node and resource locks if there are any (they might already have
14196 # been released during disk conversion)
14197 _ReleaseLocks(self, locking.LEVEL_NODE)
14198 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14200 # Apply NIC changes
14201 if self._new_nics is not None:
14202 instance.nics = self._new_nics
14203 result.extend(self._nic_chgdesc)
14206 if self.op.hvparams:
14207 instance.hvparams = self.hv_inst
14208 for key, val in self.op.hvparams.iteritems():
14209 result.append(("hv/%s" % key, val))
14212 if self.op.beparams:
14213 instance.beparams = self.be_inst
14214 for key, val in self.op.beparams.iteritems():
14215 result.append(("be/%s" % key, val))
14218 if self.op.os_name:
14219 instance.os = self.op.os_name
14222 if self.op.osparams:
14223 instance.osparams = self.os_inst
14224 for key, val in self.op.osparams.iteritems():
14225 result.append(("os/%s" % key, val))
14227 if self.op.offline is None:
14230 elif self.op.offline:
14231 # Mark instance as offline
14232 self.cfg.MarkInstanceOffline(instance.name)
14233 result.append(("admin_state", constants.ADMINST_OFFLINE))
14235 # Mark instance as online, but stopped
14236 self.cfg.MarkInstanceDown(instance.name)
14237 result.append(("admin_state", constants.ADMINST_DOWN))
14239 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14241 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14242 self.owned_locks(locking.LEVEL_NODE)), \
14243 "All node locks should have been released by now"
14247 _DISK_CONVERSIONS = {
14248 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14249 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14253 class LUInstanceChangeGroup(LogicalUnit):
14254 HPATH = "instance-change-group"
14255 HTYPE = constants.HTYPE_INSTANCE
14258 def ExpandNames(self):
14259 self.share_locks = _ShareAll()
14261 self.needed_locks = {
14262 locking.LEVEL_NODEGROUP: [],
14263 locking.LEVEL_NODE: [],
14264 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14267 self._ExpandAndLockInstance()
14269 if self.op.target_groups:
14270 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14271 self.op.target_groups)
14273 self.req_target_uuids = None
14275 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14277 def DeclareLocks(self, level):
14278 if level == locking.LEVEL_NODEGROUP:
14279 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14281 if self.req_target_uuids:
14282 lock_groups = set(self.req_target_uuids)
14284 # Lock all groups used by instance optimistically; this requires going
14285 # via the node before it's locked, requiring verification later on
14286 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14287 lock_groups.update(instance_groups)
14289 # No target groups, need to lock all of them
14290 lock_groups = locking.ALL_SET
14292 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14294 elif level == locking.LEVEL_NODE:
14295 if self.req_target_uuids:
14296 # Lock all nodes used by instances
14297 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14298 self._LockInstancesNodes()
14300 # Lock all nodes in all potential target groups
14301 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14302 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14303 member_nodes = [node_name
14304 for group in lock_groups
14305 for node_name in self.cfg.GetNodeGroup(group).members]
14306 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14308 # Lock all nodes as all groups are potential targets
14309 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14311 def CheckPrereq(self):
14312 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14313 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14314 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14316 assert (self.req_target_uuids is None or
14317 owned_groups.issuperset(self.req_target_uuids))
14318 assert owned_instances == set([self.op.instance_name])
14320 # Get instance information
14321 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14323 # Check if node groups for locked instance are still correct
14324 assert owned_nodes.issuperset(self.instance.all_nodes), \
14325 ("Instance %s's nodes changed while we kept the lock" %
14326 self.op.instance_name)
14328 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14331 if self.req_target_uuids:
14332 # User requested specific target groups
14333 self.target_uuids = frozenset(self.req_target_uuids)
14335 # All groups except those used by the instance are potential targets
14336 self.target_uuids = owned_groups - inst_groups
14338 conflicting_groups = self.target_uuids & inst_groups
14339 if conflicting_groups:
14340 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14341 " used by the instance '%s'" %
14342 (utils.CommaJoin(conflicting_groups),
14343 self.op.instance_name),
14344 errors.ECODE_INVAL)
14346 if not self.target_uuids:
14347 raise errors.OpPrereqError("There are no possible target groups",
14348 errors.ECODE_INVAL)
14350 def BuildHooksEnv(self):
14351 """Build hooks env.
14354 assert self.target_uuids
14357 "TARGET_GROUPS": " ".join(self.target_uuids),
14360 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14364 def BuildHooksNodes(self):
14365 """Build hooks nodes.
14368 mn = self.cfg.GetMasterNode()
14369 return ([mn], [mn])
14371 def Exec(self, feedback_fn):
14372 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14374 assert instances == [self.op.instance_name], "Instance not locked"
14376 req = iallocator.IAReqGroupChange(instances=instances,
14377 target_groups=list(self.target_uuids))
14378 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14380 ial.Run(self.op.iallocator)
14382 if not ial.success:
14383 raise errors.OpPrereqError("Can't compute solution for changing group of"
14384 " instance '%s' using iallocator '%s': %s" %
14385 (self.op.instance_name, self.op.iallocator,
14386 ial.info), errors.ECODE_NORES)
14388 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14390 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14391 " instance '%s'", len(jobs), self.op.instance_name)
14393 return ResultWithJobs(jobs)
14396 class LUBackupQuery(NoHooksLU):
14397 """Query the exports list
14402 def CheckArguments(self):
14403 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14404 ["node", "export"], self.op.use_locking)
14406 def ExpandNames(self):
14407 self.expq.ExpandNames(self)
14409 def DeclareLocks(self, level):
14410 self.expq.DeclareLocks(self, level)
14412 def Exec(self, feedback_fn):
14415 for (node, expname) in self.expq.OldStyleQuery(self):
14416 if expname is None:
14417 result[node] = False
14419 result.setdefault(node, []).append(expname)
14424 class _ExportQuery(_QueryBase):
14425 FIELDS = query.EXPORT_FIELDS
14427 #: The node name is not a unique key for this query
14428 SORT_FIELD = "node"
14430 def ExpandNames(self, lu):
14431 lu.needed_locks = {}
14433 # The following variables interact with _QueryBase._GetNames
14435 self.wanted = _GetWantedNodes(lu, self.names)
14437 self.wanted = locking.ALL_SET
14439 self.do_locking = self.use_locking
14441 if self.do_locking:
14442 lu.share_locks = _ShareAll()
14443 lu.needed_locks = {
14444 locking.LEVEL_NODE: self.wanted,
14448 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14450 def DeclareLocks(self, lu, level):
14453 def _GetQueryData(self, lu):
14454 """Computes the list of nodes and their attributes.
14457 # Locking is not used
14459 assert not (compat.any(lu.glm.is_owned(level)
14460 for level in locking.LEVELS
14461 if level != locking.LEVEL_CLUSTER) or
14462 self.do_locking or self.use_locking)
14464 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14468 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14470 result.append((node, None))
14472 result.extend((node, expname) for expname in nres.payload)
14477 class LUBackupPrepare(NoHooksLU):
14478 """Prepares an instance for an export and returns useful information.
14483 def ExpandNames(self):
14484 self._ExpandAndLockInstance()
14486 def CheckPrereq(self):
14487 """Check prerequisites.
14490 instance_name = self.op.instance_name
14492 self.instance = self.cfg.GetInstanceInfo(instance_name)
14493 assert self.instance is not None, \
14494 "Cannot retrieve locked instance %s" % self.op.instance_name
14495 _CheckNodeOnline(self, self.instance.primary_node)
14497 self._cds = _GetClusterDomainSecret()
14499 def Exec(self, feedback_fn):
14500 """Prepares an instance for an export.
14503 instance = self.instance
14505 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14506 salt = utils.GenerateSecret(8)
14508 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14509 result = self.rpc.call_x509_cert_create(instance.primary_node,
14510 constants.RIE_CERT_VALIDITY)
14511 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14513 (name, cert_pem) = result.payload
14515 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14519 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14520 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14522 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14528 class LUBackupExport(LogicalUnit):
14529 """Export an instance to an image in the cluster.
14532 HPATH = "instance-export"
14533 HTYPE = constants.HTYPE_INSTANCE
14536 def CheckArguments(self):
14537 """Check the arguments.
14540 self.x509_key_name = self.op.x509_key_name
14541 self.dest_x509_ca_pem = self.op.destination_x509_ca
14543 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14544 if not self.x509_key_name:
14545 raise errors.OpPrereqError("Missing X509 key name for encryption",
14546 errors.ECODE_INVAL)
14548 if not self.dest_x509_ca_pem:
14549 raise errors.OpPrereqError("Missing destination X509 CA",
14550 errors.ECODE_INVAL)
14552 def ExpandNames(self):
14553 self._ExpandAndLockInstance()
14555 # Lock all nodes for local exports
14556 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14557 # FIXME: lock only instance primary and destination node
14559 # Sad but true, for now we have do lock all nodes, as we don't know where
14560 # the previous export might be, and in this LU we search for it and
14561 # remove it from its current node. In the future we could fix this by:
14562 # - making a tasklet to search (share-lock all), then create the
14563 # new one, then one to remove, after
14564 # - removing the removal operation altogether
14565 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14567 # Allocations should be stopped while this LU runs with node locks, but
14568 # it doesn't have to be exclusive
14569 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14570 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14572 def DeclareLocks(self, level):
14573 """Last minute lock declaration."""
14574 # All nodes are locked anyway, so nothing to do here.
14576 def BuildHooksEnv(self):
14577 """Build hooks env.
14579 This will run on the master, primary node and target node.
14583 "EXPORT_MODE": self.op.mode,
14584 "EXPORT_NODE": self.op.target_node,
14585 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14586 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14587 # TODO: Generic function for boolean env variables
14588 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14591 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14595 def BuildHooksNodes(self):
14596 """Build hooks nodes.
14599 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14601 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14602 nl.append(self.op.target_node)
14606 def CheckPrereq(self):
14607 """Check prerequisites.
14609 This checks that the instance and node names are valid.
14612 instance_name = self.op.instance_name
14614 self.instance = self.cfg.GetInstanceInfo(instance_name)
14615 assert self.instance is not None, \
14616 "Cannot retrieve locked instance %s" % self.op.instance_name
14617 _CheckNodeOnline(self, self.instance.primary_node)
14619 if (self.op.remove_instance and
14620 self.instance.admin_state == constants.ADMINST_UP and
14621 not self.op.shutdown):
14622 raise errors.OpPrereqError("Can not remove instance without shutting it"
14623 " down before", errors.ECODE_STATE)
14625 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14626 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14627 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14628 assert self.dst_node is not None
14630 _CheckNodeOnline(self, self.dst_node.name)
14631 _CheckNodeNotDrained(self, self.dst_node.name)
14634 self.dest_disk_info = None
14635 self.dest_x509_ca = None
14637 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14638 self.dst_node = None
14640 if len(self.op.target_node) != len(self.instance.disks):
14641 raise errors.OpPrereqError(("Received destination information for %s"
14642 " disks, but instance %s has %s disks") %
14643 (len(self.op.target_node), instance_name,
14644 len(self.instance.disks)),
14645 errors.ECODE_INVAL)
14647 cds = _GetClusterDomainSecret()
14649 # Check X509 key name
14651 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14652 except (TypeError, ValueError), err:
14653 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14654 errors.ECODE_INVAL)
14656 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14657 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14658 errors.ECODE_INVAL)
14660 # Load and verify CA
14662 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14663 except OpenSSL.crypto.Error, err:
14664 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14665 (err, ), errors.ECODE_INVAL)
14667 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14668 if errcode is not None:
14669 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14670 (msg, ), errors.ECODE_INVAL)
14672 self.dest_x509_ca = cert
14674 # Verify target information
14676 for idx, disk_data in enumerate(self.op.target_node):
14678 (host, port, magic) = \
14679 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14680 except errors.GenericError, err:
14681 raise errors.OpPrereqError("Target info for disk %s: %s" %
14682 (idx, err), errors.ECODE_INVAL)
14684 disk_info.append((host, port, magic))
14686 assert len(disk_info) == len(self.op.target_node)
14687 self.dest_disk_info = disk_info
14690 raise errors.ProgrammerError("Unhandled export mode %r" %
14693 # instance disk type verification
14694 # TODO: Implement export support for file-based disks
14695 for disk in self.instance.disks:
14696 if disk.dev_type == constants.LD_FILE:
14697 raise errors.OpPrereqError("Export not supported for instances with"
14698 " file-based disks", errors.ECODE_INVAL)
14700 def _CleanupExports(self, feedback_fn):
14701 """Removes exports of current instance from all other nodes.
14703 If an instance in a cluster with nodes A..D was exported to node C, its
14704 exports will be removed from the nodes A, B and D.
14707 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14709 nodelist = self.cfg.GetNodeList()
14710 nodelist.remove(self.dst_node.name)
14712 # on one-node clusters nodelist will be empty after the removal
14713 # if we proceed the backup would be removed because OpBackupQuery
14714 # substitutes an empty list with the full cluster node list.
14715 iname = self.instance.name
14717 feedback_fn("Removing old exports for instance %s" % iname)
14718 exportlist = self.rpc.call_export_list(nodelist)
14719 for node in exportlist:
14720 if exportlist[node].fail_msg:
14722 if iname in exportlist[node].payload:
14723 msg = self.rpc.call_export_remove(node, iname).fail_msg
14725 self.LogWarning("Could not remove older export for instance %s"
14726 " on node %s: %s", iname, node, msg)
14728 def Exec(self, feedback_fn):
14729 """Export an instance to an image in the cluster.
14732 assert self.op.mode in constants.EXPORT_MODES
14734 instance = self.instance
14735 src_node = instance.primary_node
14737 if self.op.shutdown:
14738 # shutdown the instance, but not the disks
14739 feedback_fn("Shutting down instance %s" % instance.name)
14740 result = self.rpc.call_instance_shutdown(src_node, instance,
14741 self.op.shutdown_timeout)
14742 # TODO: Maybe ignore failures if ignore_remove_failures is set
14743 result.Raise("Could not shutdown instance %s on"
14744 " node %s" % (instance.name, src_node))
14746 # set the disks ID correctly since call_instance_start needs the
14747 # correct drbd minor to create the symlinks
14748 for disk in instance.disks:
14749 self.cfg.SetDiskID(disk, src_node)
14751 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14754 # Activate the instance disks if we'exporting a stopped instance
14755 feedback_fn("Activating disks for %s" % instance.name)
14756 _StartInstanceDisks(self, instance, None)
14759 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14762 helper.CreateSnapshots()
14764 if (self.op.shutdown and
14765 instance.admin_state == constants.ADMINST_UP and
14766 not self.op.remove_instance):
14767 assert not activate_disks
14768 feedback_fn("Starting instance %s" % instance.name)
14769 result = self.rpc.call_instance_start(src_node,
14770 (instance, None, None), False)
14771 msg = result.fail_msg
14773 feedback_fn("Failed to start instance: %s" % msg)
14774 _ShutdownInstanceDisks(self, instance)
14775 raise errors.OpExecError("Could not start instance: %s" % msg)
14777 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14778 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14779 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14780 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14781 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14783 (key_name, _, _) = self.x509_key_name
14786 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14789 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14790 key_name, dest_ca_pem,
14795 # Check for backwards compatibility
14796 assert len(dresults) == len(instance.disks)
14797 assert compat.all(isinstance(i, bool) for i in dresults), \
14798 "Not all results are boolean: %r" % dresults
14802 feedback_fn("Deactivating disks for %s" % instance.name)
14803 _ShutdownInstanceDisks(self, instance)
14805 if not (compat.all(dresults) and fin_resu):
14808 failures.append("export finalization")
14809 if not compat.all(dresults):
14810 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14812 failures.append("disk export: disk(s) %s" % fdsk)
14814 raise errors.OpExecError("Export failed, errors in %s" %
14815 utils.CommaJoin(failures))
14817 # At this point, the export was successful, we can cleanup/finish
14819 # Remove instance if requested
14820 if self.op.remove_instance:
14821 feedback_fn("Removing instance %s" % instance.name)
14822 _RemoveInstance(self, feedback_fn, instance,
14823 self.op.ignore_remove_failures)
14825 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14826 self._CleanupExports(feedback_fn)
14828 return fin_resu, dresults
14831 class LUBackupRemove(NoHooksLU):
14832 """Remove exports related to the named instance.
14837 def ExpandNames(self):
14838 self.needed_locks = {
14839 # We need all nodes to be locked in order for RemoveExport to work, but
14840 # we don't need to lock the instance itself, as nothing will happen to it
14841 # (and we can remove exports also for a removed instance)
14842 locking.LEVEL_NODE: locking.ALL_SET,
14844 # Removing backups is quick, so blocking allocations is justified
14845 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14848 # Allocations should be stopped while this LU runs with node locks, but it
14849 # doesn't have to be exclusive
14850 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14852 def Exec(self, feedback_fn):
14853 """Remove any export.
14856 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14857 # If the instance was not found we'll try with the name that was passed in.
14858 # This will only work if it was an FQDN, though.
14860 if not instance_name:
14862 instance_name = self.op.instance_name
14864 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14865 exportlist = self.rpc.call_export_list(locked_nodes)
14867 for node in exportlist:
14868 msg = exportlist[node].fail_msg
14870 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14872 if instance_name in exportlist[node].payload:
14874 result = self.rpc.call_export_remove(node, instance_name)
14875 msg = result.fail_msg
14877 logging.error("Could not remove export for instance %s"
14878 " on node %s: %s", instance_name, node, msg)
14880 if fqdn_warn and not found:
14881 feedback_fn("Export not found. If trying to remove an export belonging"
14882 " to a deleted instance please use its Fully Qualified"
14886 class LUGroupAdd(LogicalUnit):
14887 """Logical unit for creating node groups.
14890 HPATH = "group-add"
14891 HTYPE = constants.HTYPE_GROUP
14894 def ExpandNames(self):
14895 # We need the new group's UUID here so that we can create and acquire the
14896 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14897 # that it should not check whether the UUID exists in the configuration.
14898 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14899 self.needed_locks = {}
14900 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14902 def CheckPrereq(self):
14903 """Check prerequisites.
14905 This checks that the given group name is not an existing node group
14910 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14911 except errors.OpPrereqError:
14914 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14915 " node group (UUID: %s)" %
14916 (self.op.group_name, existing_uuid),
14917 errors.ECODE_EXISTS)
14919 if self.op.ndparams:
14920 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14922 if self.op.hv_state:
14923 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14925 self.new_hv_state = None
14927 if self.op.disk_state:
14928 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14930 self.new_disk_state = None
14932 if self.op.diskparams:
14933 for templ in constants.DISK_TEMPLATES:
14934 if templ in self.op.diskparams:
14935 utils.ForceDictType(self.op.diskparams[templ],
14936 constants.DISK_DT_TYPES)
14937 self.new_diskparams = self.op.diskparams
14939 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14940 except errors.OpPrereqError, err:
14941 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14942 errors.ECODE_INVAL)
14944 self.new_diskparams = {}
14946 if self.op.ipolicy:
14947 cluster = self.cfg.GetClusterInfo()
14948 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14950 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14951 except errors.ConfigurationError, err:
14952 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14953 errors.ECODE_INVAL)
14955 def BuildHooksEnv(self):
14956 """Build hooks env.
14960 "GROUP_NAME": self.op.group_name,
14963 def BuildHooksNodes(self):
14964 """Build hooks nodes.
14967 mn = self.cfg.GetMasterNode()
14968 return ([mn], [mn])
14970 def Exec(self, feedback_fn):
14971 """Add the node group to the cluster.
14974 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14975 uuid=self.group_uuid,
14976 alloc_policy=self.op.alloc_policy,
14977 ndparams=self.op.ndparams,
14978 diskparams=self.new_diskparams,
14979 ipolicy=self.op.ipolicy,
14980 hv_state_static=self.new_hv_state,
14981 disk_state_static=self.new_disk_state)
14983 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14984 del self.remove_locks[locking.LEVEL_NODEGROUP]
14987 class LUGroupAssignNodes(NoHooksLU):
14988 """Logical unit for assigning nodes to groups.
14993 def ExpandNames(self):
14994 # These raise errors.OpPrereqError on their own:
14995 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14996 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14998 # We want to lock all the affected nodes and groups. We have readily
14999 # available the list of nodes, and the *destination* group. To gather the
15000 # list of "source" groups, we need to fetch node information later on.
15001 self.needed_locks = {
15002 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15003 locking.LEVEL_NODE: self.op.nodes,
15006 def DeclareLocks(self, level):
15007 if level == locking.LEVEL_NODEGROUP:
15008 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15010 # Try to get all affected nodes' groups without having the group or node
15011 # lock yet. Needs verification later in the code flow.
15012 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15014 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15016 def CheckPrereq(self):
15017 """Check prerequisites.
15020 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15021 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15022 frozenset(self.op.nodes))
15024 expected_locks = (set([self.group_uuid]) |
15025 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15026 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15027 if actual_locks != expected_locks:
15028 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15029 " current groups are '%s', used to be '%s'" %
15030 (utils.CommaJoin(expected_locks),
15031 utils.CommaJoin(actual_locks)))
15033 self.node_data = self.cfg.GetAllNodesInfo()
15034 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15035 instance_data = self.cfg.GetAllInstancesInfo()
15037 if self.group is None:
15038 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15039 (self.op.group_name, self.group_uuid))
15041 (new_splits, previous_splits) = \
15042 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15043 for node in self.op.nodes],
15044 self.node_data, instance_data)
15047 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15049 if not self.op.force:
15050 raise errors.OpExecError("The following instances get split by this"
15051 " change and --force was not given: %s" %
15054 self.LogWarning("This operation will split the following instances: %s",
15057 if previous_splits:
15058 self.LogWarning("In addition, these already-split instances continue"
15059 " to be split across groups: %s",
15060 utils.CommaJoin(utils.NiceSort(previous_splits)))
15062 def Exec(self, feedback_fn):
15063 """Assign nodes to a new group.
15066 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15068 self.cfg.AssignGroupNodes(mods)
15071 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15072 """Check for split instances after a node assignment.
15074 This method considers a series of node assignments as an atomic operation,
15075 and returns information about split instances after applying the set of
15078 In particular, it returns information about newly split instances, and
15079 instances that were already split, and remain so after the change.
15081 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15084 @type changes: list of (node_name, new_group_uuid) pairs.
15085 @param changes: list of node assignments to consider.
15086 @param node_data: a dict with data for all nodes
15087 @param instance_data: a dict with all instances to consider
15088 @rtype: a two-tuple
15089 @return: a list of instances that were previously okay and result split as a
15090 consequence of this change, and a list of instances that were previously
15091 split and this change does not fix.
15094 changed_nodes = dict((node, group) for node, group in changes
15095 if node_data[node].group != group)
15097 all_split_instances = set()
15098 previously_split_instances = set()
15100 def InstanceNodes(instance):
15101 return [instance.primary_node] + list(instance.secondary_nodes)
15103 for inst in instance_data.values():
15104 if inst.disk_template not in constants.DTS_INT_MIRROR:
15107 instance_nodes = InstanceNodes(inst)
15109 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15110 previously_split_instances.add(inst.name)
15112 if len(set(changed_nodes.get(node, node_data[node].group)
15113 for node in instance_nodes)) > 1:
15114 all_split_instances.add(inst.name)
15116 return (list(all_split_instances - previously_split_instances),
15117 list(previously_split_instances & all_split_instances))
15120 class _GroupQuery(_QueryBase):
15121 FIELDS = query.GROUP_FIELDS
15123 def ExpandNames(self, lu):
15124 lu.needed_locks = {}
15126 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15127 self._cluster = lu.cfg.GetClusterInfo()
15128 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15131 self.wanted = [name_to_uuid[name]
15132 for name in utils.NiceSort(name_to_uuid.keys())]
15134 # Accept names to be either names or UUIDs.
15137 all_uuid = frozenset(self._all_groups.keys())
15139 for name in self.names:
15140 if name in all_uuid:
15141 self.wanted.append(name)
15142 elif name in name_to_uuid:
15143 self.wanted.append(name_to_uuid[name])
15145 missing.append(name)
15148 raise errors.OpPrereqError("Some groups do not exist: %s" %
15149 utils.CommaJoin(missing),
15150 errors.ECODE_NOENT)
15152 def DeclareLocks(self, lu, level):
15155 def _GetQueryData(self, lu):
15156 """Computes the list of node groups and their attributes.
15159 do_nodes = query.GQ_NODE in self.requested_data
15160 do_instances = query.GQ_INST in self.requested_data
15162 group_to_nodes = None
15163 group_to_instances = None
15165 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15166 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15167 # latter GetAllInstancesInfo() is not enough, for we have to go through
15168 # instance->node. Hence, we will need to process nodes even if we only need
15169 # instance information.
15170 if do_nodes or do_instances:
15171 all_nodes = lu.cfg.GetAllNodesInfo()
15172 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15175 for node in all_nodes.values():
15176 if node.group in group_to_nodes:
15177 group_to_nodes[node.group].append(node.name)
15178 node_to_group[node.name] = node.group
15181 all_instances = lu.cfg.GetAllInstancesInfo()
15182 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15184 for instance in all_instances.values():
15185 node = instance.primary_node
15186 if node in node_to_group:
15187 group_to_instances[node_to_group[node]].append(instance.name)
15190 # Do not pass on node information if it was not requested.
15191 group_to_nodes = None
15193 return query.GroupQueryData(self._cluster,
15194 [self._all_groups[uuid]
15195 for uuid in self.wanted],
15196 group_to_nodes, group_to_instances,
15197 query.GQ_DISKPARAMS in self.requested_data)
15200 class LUGroupQuery(NoHooksLU):
15201 """Logical unit for querying node groups.
15206 def CheckArguments(self):
15207 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15208 self.op.output_fields, False)
15210 def ExpandNames(self):
15211 self.gq.ExpandNames(self)
15213 def DeclareLocks(self, level):
15214 self.gq.DeclareLocks(self, level)
15216 def Exec(self, feedback_fn):
15217 return self.gq.OldStyleQuery(self)
15220 class LUGroupSetParams(LogicalUnit):
15221 """Modifies the parameters of a node group.
15224 HPATH = "group-modify"
15225 HTYPE = constants.HTYPE_GROUP
15228 def CheckArguments(self):
15231 self.op.diskparams,
15232 self.op.alloc_policy,
15234 self.op.disk_state,
15238 if all_changes.count(None) == len(all_changes):
15239 raise errors.OpPrereqError("Please pass at least one modification",
15240 errors.ECODE_INVAL)
15242 def ExpandNames(self):
15243 # This raises errors.OpPrereqError on its own:
15244 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15246 self.needed_locks = {
15247 locking.LEVEL_INSTANCE: [],
15248 locking.LEVEL_NODEGROUP: [self.group_uuid],
15251 self.share_locks[locking.LEVEL_INSTANCE] = 1
15253 def DeclareLocks(self, level):
15254 if level == locking.LEVEL_INSTANCE:
15255 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15257 # Lock instances optimistically, needs verification once group lock has
15259 self.needed_locks[locking.LEVEL_INSTANCE] = \
15260 self.cfg.GetNodeGroupInstances(self.group_uuid)
15263 def _UpdateAndVerifyDiskParams(old, new):
15264 """Updates and verifies disk parameters.
15267 new_params = _GetUpdatedParams(old, new)
15268 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15271 def CheckPrereq(self):
15272 """Check prerequisites.
15275 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15277 # Check if locked instances are still correct
15278 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15280 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15281 cluster = self.cfg.GetClusterInfo()
15283 if self.group is None:
15284 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15285 (self.op.group_name, self.group_uuid))
15287 if self.op.ndparams:
15288 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15289 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15290 self.new_ndparams = new_ndparams
15292 if self.op.diskparams:
15293 diskparams = self.group.diskparams
15294 uavdp = self._UpdateAndVerifyDiskParams
15295 # For each disktemplate subdict update and verify the values
15296 new_diskparams = dict((dt,
15297 uavdp(diskparams.get(dt, {}),
15298 self.op.diskparams[dt]))
15299 for dt in constants.DISK_TEMPLATES
15300 if dt in self.op.diskparams)
15301 # As we've all subdicts of diskparams ready, lets merge the actual
15302 # dict with all updated subdicts
15303 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15305 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15306 except errors.OpPrereqError, err:
15307 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15308 errors.ECODE_INVAL)
15310 if self.op.hv_state:
15311 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15312 self.group.hv_state_static)
15314 if self.op.disk_state:
15315 self.new_disk_state = \
15316 _MergeAndVerifyDiskState(self.op.disk_state,
15317 self.group.disk_state_static)
15319 if self.op.ipolicy:
15320 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15324 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15325 inst_filter = lambda inst: inst.name in owned_instances
15326 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15327 gmi = ganeti.masterd.instance
15329 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15331 new_ipolicy, instances)
15334 self.LogWarning("After the ipolicy change the following instances"
15335 " violate them: %s",
15336 utils.CommaJoin(violations))
15338 def BuildHooksEnv(self):
15339 """Build hooks env.
15343 "GROUP_NAME": self.op.group_name,
15344 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15347 def BuildHooksNodes(self):
15348 """Build hooks nodes.
15351 mn = self.cfg.GetMasterNode()
15352 return ([mn], [mn])
15354 def Exec(self, feedback_fn):
15355 """Modifies the node group.
15360 if self.op.ndparams:
15361 self.group.ndparams = self.new_ndparams
15362 result.append(("ndparams", str(self.group.ndparams)))
15364 if self.op.diskparams:
15365 self.group.diskparams = self.new_diskparams
15366 result.append(("diskparams", str(self.group.diskparams)))
15368 if self.op.alloc_policy:
15369 self.group.alloc_policy = self.op.alloc_policy
15371 if self.op.hv_state:
15372 self.group.hv_state_static = self.new_hv_state
15374 if self.op.disk_state:
15375 self.group.disk_state_static = self.new_disk_state
15377 if self.op.ipolicy:
15378 self.group.ipolicy = self.new_ipolicy
15380 self.cfg.Update(self.group, feedback_fn)
15384 class LUGroupRemove(LogicalUnit):
15385 HPATH = "group-remove"
15386 HTYPE = constants.HTYPE_GROUP
15389 def ExpandNames(self):
15390 # This will raises errors.OpPrereqError on its own:
15391 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15392 self.needed_locks = {
15393 locking.LEVEL_NODEGROUP: [self.group_uuid],
15396 def CheckPrereq(self):
15397 """Check prerequisites.
15399 This checks that the given group name exists as a node group, that is
15400 empty (i.e., contains no nodes), and that is not the last group of the
15404 # Verify that the group is empty.
15405 group_nodes = [node.name
15406 for node in self.cfg.GetAllNodesInfo().values()
15407 if node.group == self.group_uuid]
15410 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15412 (self.op.group_name,
15413 utils.CommaJoin(utils.NiceSort(group_nodes))),
15414 errors.ECODE_STATE)
15416 # Verify the cluster would not be left group-less.
15417 if len(self.cfg.GetNodeGroupList()) == 1:
15418 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15419 " removed" % self.op.group_name,
15420 errors.ECODE_STATE)
15422 def BuildHooksEnv(self):
15423 """Build hooks env.
15427 "GROUP_NAME": self.op.group_name,
15430 def BuildHooksNodes(self):
15431 """Build hooks nodes.
15434 mn = self.cfg.GetMasterNode()
15435 return ([mn], [mn])
15437 def Exec(self, feedback_fn):
15438 """Remove the node group.
15442 self.cfg.RemoveNodeGroup(self.group_uuid)
15443 except errors.ConfigurationError:
15444 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15445 (self.op.group_name, self.group_uuid))
15447 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15450 class LUGroupRename(LogicalUnit):
15451 HPATH = "group-rename"
15452 HTYPE = constants.HTYPE_GROUP
15455 def ExpandNames(self):
15456 # This raises errors.OpPrereqError on its own:
15457 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15459 self.needed_locks = {
15460 locking.LEVEL_NODEGROUP: [self.group_uuid],
15463 def CheckPrereq(self):
15464 """Check prerequisites.
15466 Ensures requested new name is not yet used.
15470 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15471 except errors.OpPrereqError:
15474 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15475 " node group (UUID: %s)" %
15476 (self.op.new_name, new_name_uuid),
15477 errors.ECODE_EXISTS)
15479 def BuildHooksEnv(self):
15480 """Build hooks env.
15484 "OLD_NAME": self.op.group_name,
15485 "NEW_NAME": self.op.new_name,
15488 def BuildHooksNodes(self):
15489 """Build hooks nodes.
15492 mn = self.cfg.GetMasterNode()
15494 all_nodes = self.cfg.GetAllNodesInfo()
15495 all_nodes.pop(mn, None)
15498 run_nodes.extend(node.name for node in all_nodes.values()
15499 if node.group == self.group_uuid)
15501 return (run_nodes, run_nodes)
15503 def Exec(self, feedback_fn):
15504 """Rename the node group.
15507 group = self.cfg.GetNodeGroup(self.group_uuid)
15510 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15511 (self.op.group_name, self.group_uuid))
15513 group.name = self.op.new_name
15514 self.cfg.Update(group, feedback_fn)
15516 return self.op.new_name
15519 class LUGroupEvacuate(LogicalUnit):
15520 HPATH = "group-evacuate"
15521 HTYPE = constants.HTYPE_GROUP
15524 def ExpandNames(self):
15525 # This raises errors.OpPrereqError on its own:
15526 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15528 if self.op.target_groups:
15529 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15530 self.op.target_groups)
15532 self.req_target_uuids = []
15534 if self.group_uuid in self.req_target_uuids:
15535 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15536 " as a target group (targets are %s)" %
15538 utils.CommaJoin(self.req_target_uuids)),
15539 errors.ECODE_INVAL)
15541 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15543 self.share_locks = _ShareAll()
15544 self.needed_locks = {
15545 locking.LEVEL_INSTANCE: [],
15546 locking.LEVEL_NODEGROUP: [],
15547 locking.LEVEL_NODE: [],
15550 def DeclareLocks(self, level):
15551 if level == locking.LEVEL_INSTANCE:
15552 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15554 # Lock instances optimistically, needs verification once node and group
15555 # locks have been acquired
15556 self.needed_locks[locking.LEVEL_INSTANCE] = \
15557 self.cfg.GetNodeGroupInstances(self.group_uuid)
15559 elif level == locking.LEVEL_NODEGROUP:
15560 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15562 if self.req_target_uuids:
15563 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15565 # Lock all groups used by instances optimistically; this requires going
15566 # via the node before it's locked, requiring verification later on
15567 lock_groups.update(group_uuid
15568 for instance_name in
15569 self.owned_locks(locking.LEVEL_INSTANCE)
15571 self.cfg.GetInstanceNodeGroups(instance_name))
15573 # No target groups, need to lock all of them
15574 lock_groups = locking.ALL_SET
15576 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15578 elif level == locking.LEVEL_NODE:
15579 # This will only lock the nodes in the group to be evacuated which
15580 # contain actual instances
15581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15582 self._LockInstancesNodes()
15584 # Lock all nodes in group to be evacuated and target groups
15585 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15586 assert self.group_uuid in owned_groups
15587 member_nodes = [node_name
15588 for group in owned_groups
15589 for node_name in self.cfg.GetNodeGroup(group).members]
15590 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15592 def CheckPrereq(self):
15593 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15594 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15595 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15597 assert owned_groups.issuperset(self.req_target_uuids)
15598 assert self.group_uuid in owned_groups
15600 # Check if locked instances are still correct
15601 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15603 # Get instance information
15604 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15606 # Check if node groups for locked instances are still correct
15607 _CheckInstancesNodeGroups(self.cfg, self.instances,
15608 owned_groups, owned_nodes, self.group_uuid)
15610 if self.req_target_uuids:
15611 # User requested specific target groups
15612 self.target_uuids = self.req_target_uuids
15614 # All groups except the one to be evacuated are potential targets
15615 self.target_uuids = [group_uuid for group_uuid in owned_groups
15616 if group_uuid != self.group_uuid]
15618 if not self.target_uuids:
15619 raise errors.OpPrereqError("There are no possible target groups",
15620 errors.ECODE_INVAL)
15622 def BuildHooksEnv(self):
15623 """Build hooks env.
15627 "GROUP_NAME": self.op.group_name,
15628 "TARGET_GROUPS": " ".join(self.target_uuids),
15631 def BuildHooksNodes(self):
15632 """Build hooks nodes.
15635 mn = self.cfg.GetMasterNode()
15637 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15639 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15641 return (run_nodes, run_nodes)
15643 def Exec(self, feedback_fn):
15644 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15646 assert self.group_uuid not in self.target_uuids
15648 req = iallocator.IAReqGroupChange(instances=instances,
15649 target_groups=self.target_uuids)
15650 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15652 ial.Run(self.op.iallocator)
15654 if not ial.success:
15655 raise errors.OpPrereqError("Can't compute group evacuation using"
15656 " iallocator '%s': %s" %
15657 (self.op.iallocator, ial.info),
15658 errors.ECODE_NORES)
15660 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15662 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15663 len(jobs), self.op.group_name)
15665 return ResultWithJobs(jobs)
15668 class TagsLU(NoHooksLU): # pylint: disable=W0223
15669 """Generic tags LU.
15671 This is an abstract class which is the parent of all the other tags LUs.
15674 def ExpandNames(self):
15675 self.group_uuid = None
15676 self.needed_locks = {}
15678 if self.op.kind == constants.TAG_NODE:
15679 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15680 lock_level = locking.LEVEL_NODE
15681 lock_name = self.op.name
15682 elif self.op.kind == constants.TAG_INSTANCE:
15683 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15684 lock_level = locking.LEVEL_INSTANCE
15685 lock_name = self.op.name
15686 elif self.op.kind == constants.TAG_NODEGROUP:
15687 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15688 lock_level = locking.LEVEL_NODEGROUP
15689 lock_name = self.group_uuid
15690 elif self.op.kind == constants.TAG_NETWORK:
15691 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15692 lock_level = locking.LEVEL_NETWORK
15693 lock_name = self.network_uuid
15698 if lock_level and getattr(self.op, "use_locking", True):
15699 self.needed_locks[lock_level] = lock_name
15701 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15702 # not possible to acquire the BGL based on opcode parameters)
15704 def CheckPrereq(self):
15705 """Check prerequisites.
15708 if self.op.kind == constants.TAG_CLUSTER:
15709 self.target = self.cfg.GetClusterInfo()
15710 elif self.op.kind == constants.TAG_NODE:
15711 self.target = self.cfg.GetNodeInfo(self.op.name)
15712 elif self.op.kind == constants.TAG_INSTANCE:
15713 self.target = self.cfg.GetInstanceInfo(self.op.name)
15714 elif self.op.kind == constants.TAG_NODEGROUP:
15715 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15716 elif self.op.kind == constants.TAG_NETWORK:
15717 self.target = self.cfg.GetNetwork(self.network_uuid)
15719 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15720 str(self.op.kind), errors.ECODE_INVAL)
15723 class LUTagsGet(TagsLU):
15724 """Returns the tags of a given object.
15729 def ExpandNames(self):
15730 TagsLU.ExpandNames(self)
15732 # Share locks as this is only a read operation
15733 self.share_locks = _ShareAll()
15735 def Exec(self, feedback_fn):
15736 """Returns the tag list.
15739 return list(self.target.GetTags())
15742 class LUTagsSearch(NoHooksLU):
15743 """Searches the tags for a given pattern.
15748 def ExpandNames(self):
15749 self.needed_locks = {}
15751 def CheckPrereq(self):
15752 """Check prerequisites.
15754 This checks the pattern passed for validity by compiling it.
15758 self.re = re.compile(self.op.pattern)
15759 except re.error, err:
15760 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15761 (self.op.pattern, err), errors.ECODE_INVAL)
15763 def Exec(self, feedback_fn):
15764 """Returns the tag list.
15768 tgts = [("/cluster", cfg.GetClusterInfo())]
15769 ilist = cfg.GetAllInstancesInfo().values()
15770 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15771 nlist = cfg.GetAllNodesInfo().values()
15772 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15773 tgts.extend(("/nodegroup/%s" % n.name, n)
15774 for n in cfg.GetAllNodeGroupsInfo().values())
15776 for path, target in tgts:
15777 for tag in target.GetTags():
15778 if self.re.search(tag):
15779 results.append((path, tag))
15783 class LUTagsSet(TagsLU):
15784 """Sets a tag on a given object.
15789 def CheckPrereq(self):
15790 """Check prerequisites.
15792 This checks the type and length of the tag name and value.
15795 TagsLU.CheckPrereq(self)
15796 for tag in self.op.tags:
15797 objects.TaggableObject.ValidateTag(tag)
15799 def Exec(self, feedback_fn):
15804 for tag in self.op.tags:
15805 self.target.AddTag(tag)
15806 except errors.TagError, err:
15807 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15808 self.cfg.Update(self.target, feedback_fn)
15811 class LUTagsDel(TagsLU):
15812 """Delete a list of tags from a given object.
15817 def CheckPrereq(self):
15818 """Check prerequisites.
15820 This checks that we have the given tag.
15823 TagsLU.CheckPrereq(self)
15824 for tag in self.op.tags:
15825 objects.TaggableObject.ValidateTag(tag)
15826 del_tags = frozenset(self.op.tags)
15827 cur_tags = self.target.GetTags()
15829 diff_tags = del_tags - cur_tags
15831 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15832 raise errors.OpPrereqError("Tag(s) %s not found" %
15833 (utils.CommaJoin(diff_names), ),
15834 errors.ECODE_NOENT)
15836 def Exec(self, feedback_fn):
15837 """Remove the tag from the object.
15840 for tag in self.op.tags:
15841 self.target.RemoveTag(tag)
15842 self.cfg.Update(self.target, feedback_fn)
15845 class LUTestDelay(NoHooksLU):
15846 """Sleep for a specified amount of time.
15848 This LU sleeps on the master and/or nodes for a specified amount of
15854 def ExpandNames(self):
15855 """Expand names and set required locks.
15857 This expands the node list, if any.
15860 self.needed_locks = {}
15861 if self.op.on_nodes:
15862 # _GetWantedNodes can be used here, but is not always appropriate to use
15863 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15864 # more information.
15865 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15866 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15868 def _TestDelay(self):
15869 """Do the actual sleep.
15872 if self.op.on_master:
15873 if not utils.TestDelay(self.op.duration):
15874 raise errors.OpExecError("Error during master delay test")
15875 if self.op.on_nodes:
15876 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15877 for node, node_result in result.items():
15878 node_result.Raise("Failure during rpc call to node %s" % node)
15880 def Exec(self, feedback_fn):
15881 """Execute the test delay opcode, with the wanted repetitions.
15884 if self.op.repeat == 0:
15887 top_value = self.op.repeat - 1
15888 for i in range(self.op.repeat):
15889 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15893 class LURestrictedCommand(NoHooksLU):
15894 """Logical unit for executing restricted commands.
15899 def ExpandNames(self):
15901 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15903 self.needed_locks = {
15904 locking.LEVEL_NODE: self.op.nodes,
15906 self.share_locks = {
15907 locking.LEVEL_NODE: not self.op.use_locking,
15910 def CheckPrereq(self):
15911 """Check prerequisites.
15915 def Exec(self, feedback_fn):
15916 """Execute restricted command and return output.
15919 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15921 # Check if correct locks are held
15922 assert set(self.op.nodes).issubset(owned_nodes)
15924 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15928 for node_name in self.op.nodes:
15929 nres = rpcres[node_name]
15931 msg = ("Command '%s' on node '%s' failed: %s" %
15932 (self.op.command, node_name, nres.fail_msg))
15933 result.append((False, msg))
15935 result.append((True, nres.payload))
15940 class LUTestJqueue(NoHooksLU):
15941 """Utility LU to test some aspects of the job queue.
15946 # Must be lower than default timeout for WaitForJobChange to see whether it
15947 # notices changed jobs
15948 _CLIENT_CONNECT_TIMEOUT = 20.0
15949 _CLIENT_CONFIRM_TIMEOUT = 60.0
15952 def _NotifyUsingSocket(cls, cb, errcls):
15953 """Opens a Unix socket and waits for another program to connect.
15956 @param cb: Callback to send socket name to client
15957 @type errcls: class
15958 @param errcls: Exception class to use for errors
15961 # Using a temporary directory as there's no easy way to create temporary
15962 # sockets without writing a custom loop around tempfile.mktemp and
15964 tmpdir = tempfile.mkdtemp()
15966 tmpsock = utils.PathJoin(tmpdir, "sock")
15968 logging.debug("Creating temporary socket at %s", tmpsock)
15969 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15974 # Send details to client
15977 # Wait for client to connect before continuing
15978 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15980 (conn, _) = sock.accept()
15981 except socket.error, err:
15982 raise errcls("Client didn't connect in time (%s)" % err)
15986 # Remove as soon as client is connected
15987 shutil.rmtree(tmpdir)
15989 # Wait for client to close
15992 # pylint: disable=E1101
15993 # Instance of '_socketobject' has no ... member
15994 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15996 except socket.error, err:
15997 raise errcls("Client failed to confirm notification (%s)" % err)
16001 def _SendNotification(self, test, arg, sockname):
16002 """Sends a notification to the client.
16005 @param test: Test name
16006 @param arg: Test argument (depends on test)
16007 @type sockname: string
16008 @param sockname: Socket path
16011 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16013 def _Notify(self, prereq, test, arg):
16014 """Notifies the client of a test.
16017 @param prereq: Whether this is a prereq-phase test
16019 @param test: Test name
16020 @param arg: Test argument (depends on test)
16024 errcls = errors.OpPrereqError
16026 errcls = errors.OpExecError
16028 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16032 def CheckArguments(self):
16033 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16034 self.expandnames_calls = 0
16036 def ExpandNames(self):
16037 checkargs_calls = getattr(self, "checkargs_calls", 0)
16038 if checkargs_calls < 1:
16039 raise errors.ProgrammerError("CheckArguments was not called")
16041 self.expandnames_calls += 1
16043 if self.op.notify_waitlock:
16044 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16046 self.LogInfo("Expanding names")
16048 # Get lock on master node (just to get a lock, not for a particular reason)
16049 self.needed_locks = {
16050 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16053 def Exec(self, feedback_fn):
16054 if self.expandnames_calls < 1:
16055 raise errors.ProgrammerError("ExpandNames was not called")
16057 if self.op.notify_exec:
16058 self._Notify(False, constants.JQT_EXEC, None)
16060 self.LogInfo("Executing")
16062 if self.op.log_messages:
16063 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16064 for idx, msg in enumerate(self.op.log_messages):
16065 self.LogInfo("Sending log message %s", idx + 1)
16066 feedback_fn(constants.JQT_MSGPREFIX + msg)
16067 # Report how many test messages have been sent
16068 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16071 raise errors.OpExecError("Opcode failure was requested")
16076 class LUTestAllocator(NoHooksLU):
16077 """Run allocator tests.
16079 This LU runs the allocator tests
16082 def CheckPrereq(self):
16083 """Check prerequisites.
16085 This checks the opcode parameters depending on the director and mode test.
16088 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16089 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16090 for attr in ["memory", "disks", "disk_template",
16091 "os", "tags", "nics", "vcpus"]:
16092 if not hasattr(self.op, attr):
16093 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16094 attr, errors.ECODE_INVAL)
16095 iname = self.cfg.ExpandInstanceName(self.op.name)
16096 if iname is not None:
16097 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16098 iname, errors.ECODE_EXISTS)
16099 if not isinstance(self.op.nics, list):
16100 raise errors.OpPrereqError("Invalid parameter 'nics'",
16101 errors.ECODE_INVAL)
16102 if not isinstance(self.op.disks, list):
16103 raise errors.OpPrereqError("Invalid parameter 'disks'",
16104 errors.ECODE_INVAL)
16105 for row in self.op.disks:
16106 if (not isinstance(row, dict) or
16107 constants.IDISK_SIZE not in row or
16108 not isinstance(row[constants.IDISK_SIZE], int) or
16109 constants.IDISK_MODE not in row or
16110 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16111 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16112 " parameter", errors.ECODE_INVAL)
16113 if self.op.hypervisor is None:
16114 self.op.hypervisor = self.cfg.GetHypervisorType()
16115 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16116 fname = _ExpandInstanceName(self.cfg, self.op.name)
16117 self.op.name = fname
16118 self.relocate_from = \
16119 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16120 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16121 constants.IALLOCATOR_MODE_NODE_EVAC):
16122 if not self.op.instances:
16123 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16124 self.op.instances = _GetWantedInstances(self, self.op.instances)
16126 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16127 self.op.mode, errors.ECODE_INVAL)
16129 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16130 if self.op.iallocator is None:
16131 raise errors.OpPrereqError("Missing allocator name",
16132 errors.ECODE_INVAL)
16133 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16134 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16135 self.op.direction, errors.ECODE_INVAL)
16137 def Exec(self, feedback_fn):
16138 """Run the allocator test.
16141 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16142 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16143 memory=self.op.memory,
16144 disks=self.op.disks,
16145 disk_template=self.op.disk_template,
16149 vcpus=self.op.vcpus,
16150 spindle_use=self.op.spindle_use,
16151 hypervisor=self.op.hypervisor,
16152 node_whitelist=None)
16153 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16154 req = iallocator.IAReqRelocate(name=self.op.name,
16155 relocate_from=list(self.relocate_from))
16156 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16157 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16158 target_groups=self.op.target_groups)
16159 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16160 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16161 evac_mode=self.op.evac_mode)
16162 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16163 disk_template = self.op.disk_template
16164 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16165 memory=self.op.memory,
16166 disks=self.op.disks,
16167 disk_template=disk_template,
16171 vcpus=self.op.vcpus,
16172 spindle_use=self.op.spindle_use,
16173 hypervisor=self.op.hypervisor)
16174 for idx in range(self.op.count)]
16175 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16177 raise errors.ProgrammerError("Uncatched mode %s in"
16178 " LUTestAllocator.Exec", self.op.mode)
16180 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16181 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16182 result = ial.in_text
16184 ial.Run(self.op.iallocator, validate=False)
16185 result = ial.out_text
16189 class LUNetworkAdd(LogicalUnit):
16190 """Logical unit for creating networks.
16193 HPATH = "network-add"
16194 HTYPE = constants.HTYPE_NETWORK
16197 def BuildHooksNodes(self):
16198 """Build hooks nodes.
16201 mn = self.cfg.GetMasterNode()
16202 return ([mn], [mn])
16204 def CheckArguments(self):
16205 if self.op.mac_prefix:
16206 self.op.mac_prefix = \
16207 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16209 def ExpandNames(self):
16210 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16212 if self.op.conflicts_check:
16213 self.share_locks[locking.LEVEL_NODE] = 1
16214 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16215 self.needed_locks = {
16216 locking.LEVEL_NODE: locking.ALL_SET,
16217 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16220 self.needed_locks = {}
16222 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16224 def CheckPrereq(self):
16225 if self.op.network is None:
16226 raise errors.OpPrereqError("Network must be given",
16227 errors.ECODE_INVAL)
16230 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16231 except errors.OpPrereqError:
16234 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16235 " network (UUID: %s)" %
16236 (self.op.network_name, existing_uuid),
16237 errors.ECODE_EXISTS)
16239 # Check tag validity
16240 for tag in self.op.tags:
16241 objects.TaggableObject.ValidateTag(tag)
16243 def BuildHooksEnv(self):
16244 """Build hooks env.
16248 "name": self.op.network_name,
16249 "subnet": self.op.network,
16250 "gateway": self.op.gateway,
16251 "network6": self.op.network6,
16252 "gateway6": self.op.gateway6,
16253 "mac_prefix": self.op.mac_prefix,
16254 "tags": self.op.tags,
16256 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16258 def Exec(self, feedback_fn):
16259 """Add the ip pool to the cluster.
16262 nobj = objects.Network(name=self.op.network_name,
16263 network=self.op.network,
16264 gateway=self.op.gateway,
16265 network6=self.op.network6,
16266 gateway6=self.op.gateway6,
16267 mac_prefix=self.op.mac_prefix,
16268 uuid=self.network_uuid)
16269 # Initialize the associated address pool
16271 pool = network.AddressPool.InitializeNetwork(nobj)
16272 except errors.AddressPoolError, err:
16273 raise errors.OpExecError("Cannot create IP address pool for network"
16274 " '%s': %s" % (self.op.network_name, err))
16276 # Check if we need to reserve the nodes and the cluster master IP
16277 # These may not be allocated to any instances in routed mode, as
16278 # they wouldn't function anyway.
16279 if self.op.conflicts_check:
16280 for node in self.cfg.GetAllNodesInfo().values():
16281 for ip in [node.primary_ip, node.secondary_ip]:
16283 if pool.Contains(ip):
16285 self.LogInfo("Reserved IP address of node '%s' (%s)",
16287 except errors.AddressPoolError, err:
16288 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16289 ip, node.name, err)
16291 master_ip = self.cfg.GetClusterInfo().master_ip
16293 if pool.Contains(master_ip):
16294 pool.Reserve(master_ip)
16295 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16296 except errors.AddressPoolError, err:
16297 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16300 if self.op.add_reserved_ips:
16301 for ip in self.op.add_reserved_ips:
16303 pool.Reserve(ip, external=True)
16304 except errors.AddressPoolError, err:
16305 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16309 for tag in self.op.tags:
16312 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16313 del self.remove_locks[locking.LEVEL_NETWORK]
16316 class LUNetworkRemove(LogicalUnit):
16317 HPATH = "network-remove"
16318 HTYPE = constants.HTYPE_NETWORK
16321 def ExpandNames(self):
16322 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16324 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16325 self.needed_locks = {
16326 locking.LEVEL_NETWORK: [self.network_uuid],
16327 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16330 def CheckPrereq(self):
16331 """Check prerequisites.
16333 This checks that the given network name exists as a network, that is
16334 empty (i.e., contains no nodes), and that is not the last group of the
16338 # Verify that the network is not conncted.
16339 node_groups = [group.name
16340 for group in self.cfg.GetAllNodeGroupsInfo().values()
16341 if self.network_uuid in group.networks]
16344 self.LogWarning("Network '%s' is connected to the following"
16345 " node groups: %s" %
16346 (self.op.network_name,
16347 utils.CommaJoin(utils.NiceSort(node_groups))))
16348 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16350 def BuildHooksEnv(self):
16351 """Build hooks env.
16355 "NETWORK_NAME": self.op.network_name,
16358 def BuildHooksNodes(self):
16359 """Build hooks nodes.
16362 mn = self.cfg.GetMasterNode()
16363 return ([mn], [mn])
16365 def Exec(self, feedback_fn):
16366 """Remove the network.
16370 self.cfg.RemoveNetwork(self.network_uuid)
16371 except errors.ConfigurationError:
16372 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16373 (self.op.network_name, self.network_uuid))
16376 class LUNetworkSetParams(LogicalUnit):
16377 """Modifies the parameters of a network.
16380 HPATH = "network-modify"
16381 HTYPE = constants.HTYPE_NETWORK
16384 def CheckArguments(self):
16385 if (self.op.gateway and
16386 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16387 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16388 " at once", errors.ECODE_INVAL)
16390 def ExpandNames(self):
16391 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16393 self.needed_locks = {
16394 locking.LEVEL_NETWORK: [self.network_uuid],
16397 def CheckPrereq(self):
16398 """Check prerequisites.
16401 self.network = self.cfg.GetNetwork(self.network_uuid)
16402 self.gateway = self.network.gateway
16403 self.mac_prefix = self.network.mac_prefix
16404 self.network6 = self.network.network6
16405 self.gateway6 = self.network.gateway6
16406 self.tags = self.network.tags
16408 self.pool = network.AddressPool(self.network)
16410 if self.op.gateway:
16411 if self.op.gateway == constants.VALUE_NONE:
16412 self.gateway = None
16414 self.gateway = self.op.gateway
16415 if self.pool.IsReserved(self.gateway):
16416 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16417 " reserved" % self.gateway,
16418 errors.ECODE_STATE)
16420 if self.op.mac_prefix:
16421 if self.op.mac_prefix == constants.VALUE_NONE:
16422 self.mac_prefix = None
16424 self.mac_prefix = \
16425 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16427 if self.op.gateway6:
16428 if self.op.gateway6 == constants.VALUE_NONE:
16429 self.gateway6 = None
16431 self.gateway6 = self.op.gateway6
16433 if self.op.network6:
16434 if self.op.network6 == constants.VALUE_NONE:
16435 self.network6 = None
16437 self.network6 = self.op.network6
16439 def BuildHooksEnv(self):
16440 """Build hooks env.
16444 "name": self.op.network_name,
16445 "subnet": self.network.network,
16446 "gateway": self.gateway,
16447 "network6": self.network6,
16448 "gateway6": self.gateway6,
16449 "mac_prefix": self.mac_prefix,
16452 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16454 def BuildHooksNodes(self):
16455 """Build hooks nodes.
16458 mn = self.cfg.GetMasterNode()
16459 return ([mn], [mn])
16461 def Exec(self, feedback_fn):
16462 """Modifies the network.
16465 #TODO: reserve/release via temporary reservation manager
16466 # extend cfg.ReserveIp/ReleaseIp with the external flag
16467 if self.op.gateway:
16468 if self.gateway == self.network.gateway:
16469 self.LogWarning("Gateway is already %s", self.gateway)
16472 self.pool.Reserve(self.gateway, external=True)
16473 if self.network.gateway:
16474 self.pool.Release(self.network.gateway, external=True)
16475 self.network.gateway = self.gateway
16477 if self.op.add_reserved_ips:
16478 for ip in self.op.add_reserved_ips:
16480 if self.pool.IsReserved(ip):
16481 self.LogWarning("IP address %s is already reserved", ip)
16483 self.pool.Reserve(ip, external=True)
16484 except errors.AddressPoolError, err:
16485 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16487 if self.op.remove_reserved_ips:
16488 for ip in self.op.remove_reserved_ips:
16489 if ip == self.network.gateway:
16490 self.LogWarning("Cannot unreserve Gateway's IP")
16493 if not self.pool.IsReserved(ip):
16494 self.LogWarning("IP address %s is already unreserved", ip)
16496 self.pool.Release(ip, external=True)
16497 except errors.AddressPoolError, err:
16498 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16500 if self.op.mac_prefix:
16501 self.network.mac_prefix = self.mac_prefix
16503 if self.op.network6:
16504 self.network.network6 = self.network6
16506 if self.op.gateway6:
16507 self.network.gateway6 = self.gateway6
16509 self.pool.Validate()
16511 self.cfg.Update(self.network, feedback_fn)
16514 class _NetworkQuery(_QueryBase):
16515 FIELDS = query.NETWORK_FIELDS
16517 def ExpandNames(self, lu):
16518 lu.needed_locks = {}
16519 lu.share_locks = _ShareAll()
16521 self.do_locking = self.use_locking
16523 all_networks = lu.cfg.GetAllNetworksInfo()
16524 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16530 for name in self.names:
16531 if name in name_to_uuid:
16532 self.wanted.append(name_to_uuid[name])
16534 missing.append(name)
16537 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16538 errors.ECODE_NOENT)
16540 self.wanted = locking.ALL_SET
16542 if self.do_locking:
16543 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16544 if query.NETQ_INST in self.requested_data:
16545 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16546 if query.NETQ_GROUP in self.requested_data:
16547 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16549 def DeclareLocks(self, lu, level):
16552 def _GetQueryData(self, lu):
16553 """Computes the list of networks and their attributes.
16556 all_networks = lu.cfg.GetAllNetworksInfo()
16558 network_uuids = self._GetNames(lu, all_networks.keys(),
16559 locking.LEVEL_NETWORK)
16561 do_instances = query.NETQ_INST in self.requested_data
16562 do_groups = query.NETQ_GROUP in self.requested_data
16564 network_to_instances = None
16565 network_to_groups = None
16567 # For NETQ_GROUP, we need to map network->[groups]
16569 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16570 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16571 for _, group in all_groups.iteritems():
16572 for net_uuid in network_uuids:
16573 netparams = group.networks.get(net_uuid, None)
16575 info = (group.name, netparams[constants.NIC_MODE],
16576 netparams[constants.NIC_LINK])
16578 network_to_groups[net_uuid].append(info)
16581 all_instances = lu.cfg.GetAllInstancesInfo()
16582 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16583 for instance in all_instances.values():
16584 for nic in instance.nics:
16585 if nic.network in network_uuids:
16586 network_to_instances[nic.network].append(instance.name)
16589 if query.NETQ_STATS in self.requested_data:
16592 self._GetStats(network.AddressPool(all_networks[uuid])))
16593 for uuid in network_uuids)
16597 return query.NetworkQueryData([all_networks[uuid]
16598 for uuid in network_uuids],
16600 network_to_instances,
16604 def _GetStats(pool):
16605 """Returns statistics for a network address pool.
16609 "free_count": pool.GetFreeCount(),
16610 "reserved_count": pool.GetReservedCount(),
16611 "map": pool.GetMap(),
16612 "external_reservations":
16613 utils.CommaJoin(pool.GetExternalReservations()),
16617 class LUNetworkQuery(NoHooksLU):
16618 """Logical unit for querying networks.
16623 def CheckArguments(self):
16624 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16625 self.op.output_fields, self.op.use_locking)
16627 def ExpandNames(self):
16628 self.nq.ExpandNames(self)
16630 def Exec(self, feedback_fn):
16631 return self.nq.OldStyleQuery(self)
16634 class LUNetworkConnect(LogicalUnit):
16635 """Connect a network to a nodegroup
16638 HPATH = "network-connect"
16639 HTYPE = constants.HTYPE_NETWORK
16642 def ExpandNames(self):
16643 self.network_name = self.op.network_name
16644 self.group_name = self.op.group_name
16645 self.network_mode = self.op.network_mode
16646 self.network_link = self.op.network_link
16648 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16649 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16651 self.needed_locks = {
16652 locking.LEVEL_INSTANCE: [],
16653 locking.LEVEL_NODEGROUP: [self.group_uuid],
16655 self.share_locks[locking.LEVEL_INSTANCE] = 1
16657 if self.op.conflicts_check:
16658 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16659 self.share_locks[locking.LEVEL_NETWORK] = 1
16661 def DeclareLocks(self, level):
16662 if level == locking.LEVEL_INSTANCE:
16663 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16665 # Lock instances optimistically, needs verification once group lock has
16667 if self.op.conflicts_check:
16668 self.needed_locks[locking.LEVEL_INSTANCE] = \
16669 self.cfg.GetNodeGroupInstances(self.group_uuid)
16671 def BuildHooksEnv(self):
16673 "GROUP_NAME": self.group_name,
16674 "GROUP_NETWORK_MODE": self.network_mode,
16675 "GROUP_NETWORK_LINK": self.network_link,
16679 def BuildHooksNodes(self):
16680 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16681 return (nodes, nodes)
16683 def CheckPrereq(self):
16684 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16686 assert self.group_uuid in owned_groups
16688 # Check if locked instances are still correct
16689 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16690 if self.op.conflicts_check:
16691 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16694 constants.NIC_MODE: self.network_mode,
16695 constants.NIC_LINK: self.network_link,
16697 objects.NIC.CheckParameterSyntax(self.netparams)
16699 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16700 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16701 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16702 self.connected = False
16703 if self.network_uuid in self.group.networks:
16704 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16705 (self.network_name, self.group.name))
16706 self.connected = True
16708 # check only if not already connected
16709 elif self.op.conflicts_check:
16710 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16712 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16713 "connect to", owned_instances)
16715 def Exec(self, feedback_fn):
16716 # Connect the network and update the group only if not already connected
16717 if not self.connected:
16718 self.group.networks[self.network_uuid] = self.netparams
16719 self.cfg.Update(self.group, feedback_fn)
16722 def _NetworkConflictCheck(lu, check_fn, action, instances):
16723 """Checks for network interface conflicts with a network.
16725 @type lu: L{LogicalUnit}
16726 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16728 @param check_fn: Function checking for conflict
16729 @type action: string
16730 @param action: Part of error message (see code)
16731 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16736 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16737 instconflicts = [(idx, nic.ip)
16738 for (idx, nic) in enumerate(instance.nics)
16742 conflicts.append((instance.name, instconflicts))
16745 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16746 " node group '%s', are in use: %s" %
16747 (lu.network_name, action, lu.group.name,
16748 utils.CommaJoin(("%s: %s" %
16749 (name, _FmtNetworkConflict(details)))
16750 for (name, details) in conflicts)))
16752 raise errors.OpPrereqError("Conflicting IP addresses found; "
16753 " remove/modify the corresponding network"
16754 " interfaces", errors.ECODE_STATE)
16757 def _FmtNetworkConflict(details):
16758 """Utility for L{_NetworkConflictCheck}.
16761 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16762 for (idx, ipaddr) in details)
16765 class LUNetworkDisconnect(LogicalUnit):
16766 """Disconnect a network to a nodegroup
16769 HPATH = "network-disconnect"
16770 HTYPE = constants.HTYPE_NETWORK
16773 def ExpandNames(self):
16774 self.network_name = self.op.network_name
16775 self.group_name = self.op.group_name
16777 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16778 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16780 self.needed_locks = {
16781 locking.LEVEL_INSTANCE: [],
16782 locking.LEVEL_NODEGROUP: [self.group_uuid],
16784 self.share_locks[locking.LEVEL_INSTANCE] = 1
16786 def DeclareLocks(self, level):
16787 if level == locking.LEVEL_INSTANCE:
16788 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16790 # Lock instances optimistically, needs verification once group lock has
16792 self.needed_locks[locking.LEVEL_INSTANCE] = \
16793 self.cfg.GetNodeGroupInstances(self.group_uuid)
16795 def BuildHooksEnv(self):
16797 "GROUP_NAME": self.group_name,
16801 def BuildHooksNodes(self):
16802 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16803 return (nodes, nodes)
16805 def CheckPrereq(self):
16806 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16808 assert self.group_uuid in owned_groups
16810 # Check if locked instances are still correct
16811 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16812 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16814 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16815 self.connected = True
16816 if self.network_uuid not in self.group.networks:
16817 self.LogWarning("Network '%s' is not mapped to group '%s'",
16818 self.network_name, self.group.name)
16819 self.connected = False
16821 # We need this check only if network is not already connected
16823 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16824 "disconnect from", owned_instances)
16826 def Exec(self, feedback_fn):
16827 # Disconnect the network and update the group only if network is connected
16829 del self.group.networks[self.network_uuid]
16830 self.cfg.Update(self.group, feedback_fn)
16833 #: Query type implementations
16835 constants.QR_CLUSTER: _ClusterQuery,
16836 constants.QR_INSTANCE: _InstanceQuery,
16837 constants.QR_NODE: _NodeQuery,
16838 constants.QR_GROUP: _GroupQuery,
16839 constants.QR_NETWORK: _NetworkQuery,
16840 constants.QR_OS: _OsQuery,
16841 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16842 constants.QR_EXPORT: _ExportQuery,
16845 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16848 def _GetQueryImplementation(name):
16849 """Returns the implemtnation for a query type.
16851 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16855 return _QUERY_IMPL[name]
16857 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16858 errors.ECODE_INVAL)
16861 def _CheckForConflictingIp(lu, ip, node):
16862 """In case of conflicting IP address raise error.
16865 @param ip: IP address
16867 @param node: node name
16870 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16871 if conf_net is not None:
16872 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16874 errors.ECODE_STATE)
16876 return (None, None)