4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _UpdateMinMaxISpecs(ipolicy, new_minmax, group_policy):
817 use_none = use_default = group_policy
818 minmax = ipolicy.setdefault(constants.ISPECS_MINMAX, {})
819 for (key, value) in new_minmax.items():
820 if key not in constants.ISPECS_MINMAX_KEYS:
821 raise errors.OpPrereqError("Invalid key in new ipolicy/%s: %s" %
822 (constants.ISPECS_MINMAX, key),
824 old_spec = minmax.get(key, {})
825 minmax[key] = _GetUpdatedParams(old_spec, value, use_none=use_none,
826 use_default=use_default)
827 utils.ForceDictType(minmax[key], constants.ISPECS_PARAMETER_TYPES)
830 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
831 """Return the new version of an instance policy.
833 @param group_policy: whether this policy applies to a group and thus
834 we should support removal of policy entries
837 use_none = use_default = group_policy
838 ipolicy = copy.deepcopy(old_ipolicy)
839 for key, value in new_ipolicy.items():
840 if key not in constants.IPOLICY_ALL_KEYS:
841 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
843 if key == constants.ISPECS_MINMAX:
844 _UpdateMinMaxISpecs(ipolicy, value, group_policy)
845 elif key == constants.ISPECS_STD:
846 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
848 use_default=use_default)
849 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
851 if (not value or value == [constants.VALUE_DEFAULT] or
852 value == constants.VALUE_DEFAULT):
856 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
857 " on the cluster'" % key,
860 if key in constants.IPOLICY_PARAMETERS:
861 # FIXME: we assume all such values are float
863 ipolicy[key] = float(value)
864 except (TypeError, ValueError), err:
865 raise errors.OpPrereqError("Invalid value for attribute"
866 " '%s': '%s', error: %s" %
867 (key, value, err), errors.ECODE_INVAL)
869 # FIXME: we assume all others are lists; this should be redone
871 ipolicy[key] = list(value)
873 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
874 except errors.ConfigurationError, err:
875 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
880 def _UpdateAndVerifySubDict(base, updates, type_check):
881 """Updates and verifies a dict with sub dicts of the same type.
883 @param base: The dict with the old data
884 @param updates: The dict with the new data
885 @param type_check: Dict suitable to ForceDictType to verify correct types
886 @returns: A new dict with updated and verified values
890 new = _GetUpdatedParams(old, value)
891 utils.ForceDictType(new, type_check)
894 ret = copy.deepcopy(base)
895 ret.update(dict((key, fn(base.get(key, {}), value))
896 for key, value in updates.items()))
900 def _MergeAndVerifyHvState(op_input, obj_input):
901 """Combines the hv state from an opcode with the one of the object
903 @param op_input: The input dict from the opcode
904 @param obj_input: The input dict from the objects
905 @return: The verified and updated dict
909 invalid_hvs = set(op_input) - constants.HYPER_TYPES
911 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
912 " %s" % utils.CommaJoin(invalid_hvs),
914 if obj_input is None:
916 type_check = constants.HVSTS_PARAMETER_TYPES
917 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
922 def _MergeAndVerifyDiskState(op_input, obj_input):
923 """Combines the disk state from an opcode with the one of the object
925 @param op_input: The input dict from the opcode
926 @param obj_input: The input dict from the objects
927 @return: The verified and updated dict
930 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
932 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
933 utils.CommaJoin(invalid_dst),
935 type_check = constants.DSS_PARAMETER_TYPES
936 if obj_input is None:
938 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
940 for key, value in op_input.items())
945 def _ReleaseLocks(lu, level, names=None, keep=None):
946 """Releases locks owned by an LU.
948 @type lu: L{LogicalUnit}
949 @param level: Lock level
950 @type names: list or None
951 @param names: Names of locks to release
952 @type keep: list or None
953 @param keep: Names of locks to retain
956 assert not (keep is not None and names is not None), \
957 "Only one of the 'names' and the 'keep' parameters can be given"
959 if names is not None:
960 should_release = names.__contains__
962 should_release = lambda name: name not in keep
964 should_release = None
966 owned = lu.owned_locks(level)
968 # Not owning any lock at this level, do nothing
975 # Determine which locks to release
977 if should_release(name):
982 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
984 # Release just some locks
985 lu.glm.release(level, names=release)
987 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
990 lu.glm.release(level)
992 assert not lu.glm.is_owned(level), "No locks should be owned"
995 def _MapInstanceDisksToNodes(instances):
996 """Creates a map from (node, volume) to instance name.
998 @type instances: list of L{objects.Instance}
999 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
1002 return dict(((node, vol), inst.name)
1003 for inst in instances
1004 for (node, vols) in inst.MapLVsByNode().items()
1008 def _RunPostHook(lu, node_name):
1009 """Runs the post-hook for an opcode on a single node.
1012 hm = lu.proc.BuildHooksManager(lu)
1014 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
1015 except Exception, err: # pylint: disable=W0703
1016 lu.LogWarning("Errors occurred running hooks on %s: %s",
1020 def _CheckOutputFields(static, dynamic, selected):
1021 """Checks whether all selected fields are valid.
1023 @type static: L{utils.FieldSet}
1024 @param static: static fields set
1025 @type dynamic: L{utils.FieldSet}
1026 @param dynamic: dynamic fields set
1029 f = utils.FieldSet()
1033 delta = f.NonMatching(selected)
1035 raise errors.OpPrereqError("Unknown output fields selected: %s"
1036 % ",".join(delta), errors.ECODE_INVAL)
1039 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1040 """Make sure that none of the given paramters is global.
1042 If a global parameter is found, an L{errors.OpPrereqError} exception is
1043 raised. This is used to avoid setting global parameters for individual nodes.
1045 @type params: dictionary
1046 @param params: Parameters to check
1047 @type glob_pars: dictionary
1048 @param glob_pars: Forbidden parameters
1050 @param kind: Kind of parameters (e.g. "node")
1051 @type bad_levels: string
1052 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1054 @type good_levels: strings
1055 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1059 used_globals = glob_pars.intersection(params)
1061 msg = ("The following %s parameters are global and cannot"
1062 " be customized at %s level, please modify them at"
1064 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1065 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1068 def _CheckNodeOnline(lu, node, msg=None):
1069 """Ensure that a given node is online.
1071 @param lu: the LU on behalf of which we make the check
1072 @param node: the node to check
1073 @param msg: if passed, should be a message to replace the default one
1074 @raise errors.OpPrereqError: if the node is offline
1078 msg = "Can't use offline node"
1079 if lu.cfg.GetNodeInfo(node).offline:
1080 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1083 def _CheckNodeNotDrained(lu, node):
1084 """Ensure that a given node is not drained.
1086 @param lu: the LU on behalf of which we make the check
1087 @param node: the node to check
1088 @raise errors.OpPrereqError: if the node is drained
1091 if lu.cfg.GetNodeInfo(node).drained:
1092 raise errors.OpPrereqError("Can't use drained node %s" % node,
1096 def _CheckNodeVmCapable(lu, node):
1097 """Ensure that a given node is vm capable.
1099 @param lu: the LU on behalf of which we make the check
1100 @param node: the node to check
1101 @raise errors.OpPrereqError: if the node is not vm capable
1104 if not lu.cfg.GetNodeInfo(node).vm_capable:
1105 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1109 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1110 """Ensure that a node supports a given OS.
1112 @param lu: the LU on behalf of which we make the check
1113 @param node: the node to check
1114 @param os_name: the OS to query about
1115 @param force_variant: whether to ignore variant errors
1116 @raise errors.OpPrereqError: if the node is not supporting the OS
1119 result = lu.rpc.call_os_get(node, os_name)
1120 result.Raise("OS '%s' not in supported OS list for node %s" %
1122 prereq=True, ecode=errors.ECODE_INVAL)
1123 if not force_variant:
1124 _CheckOSVariant(result.payload, os_name)
1127 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1128 """Ensure that a node has the given secondary ip.
1130 @type lu: L{LogicalUnit}
1131 @param lu: the LU on behalf of which we make the check
1133 @param node: the node to check
1134 @type secondary_ip: string
1135 @param secondary_ip: the ip to check
1136 @type prereq: boolean
1137 @param prereq: whether to throw a prerequisite or an execute error
1138 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1139 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1142 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1143 result.Raise("Failure checking secondary ip on node %s" % node,
1144 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1145 if not result.payload:
1146 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1147 " please fix and re-run this command" % secondary_ip)
1149 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1151 raise errors.OpExecError(msg)
1154 def _CheckNodePVs(nresult, exclusive_storage):
1158 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1159 if pvlist_dict is None:
1160 return (["Can't get PV list from node"], None)
1161 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1163 # check that ':' is not present in PV names, since it's a
1164 # special character for lvcreate (denotes the range of PEs to
1168 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1169 (pv.name, pv.vg_name))
1171 if exclusive_storage:
1172 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1173 errlist.extend(errmsgs)
1174 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1176 for (pvname, lvlist) in shared_pvs:
1177 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1178 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1179 (pvname, utils.CommaJoin(lvlist)))
1180 return (errlist, es_pvinfo)
1183 def _GetClusterDomainSecret():
1184 """Reads the cluster domain secret.
1187 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1191 def _CheckInstanceState(lu, instance, req_states, msg=None):
1192 """Ensure that an instance is in one of the required states.
1194 @param lu: the LU on behalf of which we make the check
1195 @param instance: the instance to check
1196 @param msg: if passed, should be a message to replace the default one
1197 @raise errors.OpPrereqError: if the instance is not in the required state
1201 msg = ("can't use instance from outside %s states" %
1202 utils.CommaJoin(req_states))
1203 if instance.admin_state not in req_states:
1204 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1205 (instance.name, instance.admin_state, msg),
1208 if constants.ADMINST_UP not in req_states:
1209 pnode = instance.primary_node
1210 if not lu.cfg.GetNodeInfo(pnode).offline:
1211 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1212 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1213 prereq=True, ecode=errors.ECODE_ENVIRON)
1214 if instance.name in ins_l.payload:
1215 raise errors.OpPrereqError("Instance %s is running, %s" %
1216 (instance.name, msg), errors.ECODE_STATE)
1218 lu.LogWarning("Primary node offline, ignoring check that instance"
1222 def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
1223 """Computes if value is in the desired range.
1225 @param name: name of the parameter for which we perform the check
1226 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1228 @param ispecs: dictionary containing min and max values
1229 @param value: actual value that we want to use
1230 @return: None or an error string
1233 if value in [None, constants.VALUE_AUTO]:
1235 max_v = ispecs[constants.ISPECS_MAX].get(name, value)
1236 min_v = ispecs[constants.ISPECS_MIN].get(name, value)
1237 if value > max_v or min_v > value:
1239 fqn = "%s/%s" % (name, qualifier)
1242 return ("%s value %s is not in range [%s, %s]" %
1243 (fqn, value, min_v, max_v))
1247 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1248 nic_count, disk_sizes, spindle_use,
1250 _compute_fn=_ComputeMinMaxSpec):
1251 """Verifies ipolicy against provided specs.
1254 @param ipolicy: The ipolicy
1256 @param mem_size: The memory size
1257 @type cpu_count: int
1258 @param cpu_count: Used cpu cores
1259 @type disk_count: int
1260 @param disk_count: Number of disks used
1261 @type nic_count: int
1262 @param nic_count: Number of nics used
1263 @type disk_sizes: list of ints
1264 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1265 @type spindle_use: int
1266 @param spindle_use: The number of spindles this instance uses
1267 @type disk_template: string
1268 @param disk_template: The disk template of the instance
1269 @param _compute_fn: The compute function (unittest only)
1270 @return: A list of violations, or an empty list of no violations are found
1273 assert disk_count == len(disk_sizes)
1276 (constants.ISPEC_MEM_SIZE, "", mem_size),
1277 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1278 (constants.ISPEC_NIC_COUNT, "", nic_count),
1279 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1280 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1281 for idx, d in enumerate(disk_sizes)]
1282 if disk_template != constants.DT_DISKLESS:
1283 # This check doesn't make sense for diskless instances
1284 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1286 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1287 if disk_template not in allowed_dts:
1288 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1289 (disk_template, utils.CommaJoin(allowed_dts)))
1291 minmax = ipolicy[constants.ISPECS_MINMAX]
1292 return ret + filter(None,
1293 (_compute_fn(name, qualifier, minmax, value)
1294 for (name, qualifier, value) in test_settings))
1297 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1298 _compute_fn=_ComputeIPolicySpecViolation):
1299 """Compute if instance meets the specs of ipolicy.
1302 @param ipolicy: The ipolicy to verify against
1303 @type instance: L{objects.Instance}
1304 @param instance: The instance to verify
1305 @type cfg: L{config.ConfigWriter}
1306 @param cfg: Cluster configuration
1307 @param _compute_fn: The function to verify ipolicy (unittest only)
1308 @see: L{_ComputeIPolicySpecViolation}
1311 be_full = cfg.GetClusterInfo().FillBE(instance)
1312 mem_size = be_full[constants.BE_MAXMEM]
1313 cpu_count = be_full[constants.BE_VCPUS]
1314 spindle_use = be_full[constants.BE_SPINDLE_USE]
1315 disk_count = len(instance.disks)
1316 disk_sizes = [disk.size for disk in instance.disks]
1317 nic_count = len(instance.nics)
1318 disk_template = instance.disk_template
1320 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1321 disk_sizes, spindle_use, disk_template)
1324 def _ComputeIPolicyInstanceSpecViolation(
1325 ipolicy, instance_spec, disk_template,
1326 _compute_fn=_ComputeIPolicySpecViolation):
1327 """Compute if instance specs meets the specs of ipolicy.
1330 @param ipolicy: The ipolicy to verify against
1331 @param instance_spec: dict
1332 @param instance_spec: The instance spec to verify
1333 @type disk_template: string
1334 @param disk_template: the disk template of the instance
1335 @param _compute_fn: The function to verify ipolicy (unittest only)
1336 @see: L{_ComputeIPolicySpecViolation}
1339 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1340 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1341 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1342 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1343 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1344 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1346 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1347 disk_sizes, spindle_use, disk_template)
1350 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1352 _compute_fn=_ComputeIPolicyInstanceViolation):
1353 """Compute if instance meets the specs of the new target group.
1355 @param ipolicy: The ipolicy to verify
1356 @param instance: The instance object to verify
1357 @param current_group: The current group of the instance
1358 @param target_group: The new group of the instance
1359 @type cfg: L{config.ConfigWriter}
1360 @param cfg: Cluster configuration
1361 @param _compute_fn: The function to verify ipolicy (unittest only)
1362 @see: L{_ComputeIPolicySpecViolation}
1365 if current_group == target_group:
1368 return _compute_fn(ipolicy, instance, cfg)
1371 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1372 _compute_fn=_ComputeIPolicyNodeViolation):
1373 """Checks that the target node is correct in terms of instance policy.
1375 @param ipolicy: The ipolicy to verify
1376 @param instance: The instance object to verify
1377 @param node: The new node to relocate
1378 @type cfg: L{config.ConfigWriter}
1379 @param cfg: Cluster configuration
1380 @param ignore: Ignore violations of the ipolicy
1381 @param _compute_fn: The function to verify ipolicy (unittest only)
1382 @see: L{_ComputeIPolicySpecViolation}
1385 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1386 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1389 msg = ("Instance does not meet target node group's (%s) instance"
1390 " policy: %s") % (node.group, utils.CommaJoin(res))
1394 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1397 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1398 """Computes a set of any instances that would violate the new ipolicy.
1400 @param old_ipolicy: The current (still in-place) ipolicy
1401 @param new_ipolicy: The new (to become) ipolicy
1402 @param instances: List of instances to verify
1403 @type cfg: L{config.ConfigWriter}
1404 @param cfg: Cluster configuration
1405 @return: A list of instances which violates the new ipolicy but
1409 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1410 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1413 def _ExpandItemName(fn, name, kind):
1414 """Expand an item name.
1416 @param fn: the function to use for expansion
1417 @param name: requested item name
1418 @param kind: text description ('Node' or 'Instance')
1419 @return: the resolved (full) name
1420 @raise errors.OpPrereqError: if the item is not found
1423 full_name = fn(name)
1424 if full_name is None:
1425 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1430 def _ExpandNodeName(cfg, name):
1431 """Wrapper over L{_ExpandItemName} for nodes."""
1432 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1435 def _ExpandInstanceName(cfg, name):
1436 """Wrapper over L{_ExpandItemName} for instance."""
1437 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1440 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1442 """Builds network related env variables for hooks
1444 This builds the hook environment from individual variables.
1447 @param name: the name of the network
1448 @type subnet: string
1449 @param subnet: the ipv4 subnet
1450 @type gateway: string
1451 @param gateway: the ipv4 gateway
1452 @type network6: string
1453 @param network6: the ipv6 subnet
1454 @type gateway6: string
1455 @param gateway6: the ipv6 gateway
1456 @type mac_prefix: string
1457 @param mac_prefix: the mac_prefix
1459 @param tags: the tags of the network
1464 env["NETWORK_NAME"] = name
1466 env["NETWORK_SUBNET"] = subnet
1468 env["NETWORK_GATEWAY"] = gateway
1470 env["NETWORK_SUBNET6"] = network6
1472 env["NETWORK_GATEWAY6"] = gateway6
1474 env["NETWORK_MAC_PREFIX"] = mac_prefix
1476 env["NETWORK_TAGS"] = " ".join(tags)
1481 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1482 minmem, maxmem, vcpus, nics, disk_template, disks,
1483 bep, hvp, hypervisor_name, tags):
1484 """Builds instance related env variables for hooks
1486 This builds the hook environment from individual variables.
1489 @param name: the name of the instance
1490 @type primary_node: string
1491 @param primary_node: the name of the instance's primary node
1492 @type secondary_nodes: list
1493 @param secondary_nodes: list of secondary nodes as strings
1494 @type os_type: string
1495 @param os_type: the name of the instance's OS
1496 @type status: string
1497 @param status: the desired status of the instance
1498 @type minmem: string
1499 @param minmem: the minimum memory size of the instance
1500 @type maxmem: string
1501 @param maxmem: the maximum memory size of the instance
1503 @param vcpus: the count of VCPUs the instance has
1505 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1506 the NICs the instance has
1507 @type disk_template: string
1508 @param disk_template: the disk template of the instance
1510 @param disks: the list of (size, mode) pairs
1512 @param bep: the backend parameters for the instance
1514 @param hvp: the hypervisor parameters for the instance
1515 @type hypervisor_name: string
1516 @param hypervisor_name: the hypervisor for the instance
1518 @param tags: list of instance tags as strings
1520 @return: the hook environment for this instance
1525 "INSTANCE_NAME": name,
1526 "INSTANCE_PRIMARY": primary_node,
1527 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1528 "INSTANCE_OS_TYPE": os_type,
1529 "INSTANCE_STATUS": status,
1530 "INSTANCE_MINMEM": minmem,
1531 "INSTANCE_MAXMEM": maxmem,
1532 # TODO(2.7) remove deprecated "memory" value
1533 "INSTANCE_MEMORY": maxmem,
1534 "INSTANCE_VCPUS": vcpus,
1535 "INSTANCE_DISK_TEMPLATE": disk_template,
1536 "INSTANCE_HYPERVISOR": hypervisor_name,
1539 nic_count = len(nics)
1540 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1543 env["INSTANCE_NIC%d_IP" % idx] = ip
1544 env["INSTANCE_NIC%d_MAC" % idx] = mac
1545 env["INSTANCE_NIC%d_MODE" % idx] = mode
1546 env["INSTANCE_NIC%d_LINK" % idx] = link
1548 nobj = objects.Network.FromDict(netinfo)
1549 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1551 # FIXME: broken network reference: the instance NIC specifies a
1552 # network, but the relevant network entry was not in the config. This
1553 # should be made impossible.
1554 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1555 if mode == constants.NIC_MODE_BRIDGED:
1556 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1560 env["INSTANCE_NIC_COUNT"] = nic_count
1563 disk_count = len(disks)
1564 for idx, (size, mode) in enumerate(disks):
1565 env["INSTANCE_DISK%d_SIZE" % idx] = size
1566 env["INSTANCE_DISK%d_MODE" % idx] = mode
1570 env["INSTANCE_DISK_COUNT"] = disk_count
1575 env["INSTANCE_TAGS"] = " ".join(tags)
1577 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1578 for key, value in source.items():
1579 env["INSTANCE_%s_%s" % (kind, key)] = value
1584 def _NICToTuple(lu, nic):
1585 """Build a tupple of nic information.
1587 @type lu: L{LogicalUnit}
1588 @param lu: the logical unit on whose behalf we execute
1589 @type nic: L{objects.NIC}
1590 @param nic: nic to convert to hooks tuple
1593 cluster = lu.cfg.GetClusterInfo()
1594 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1595 mode = filled_params[constants.NIC_MODE]
1596 link = filled_params[constants.NIC_LINK]
1599 nobj = lu.cfg.GetNetwork(nic.network)
1600 netinfo = objects.Network.ToDict(nobj)
1601 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1604 def _NICListToTuple(lu, nics):
1605 """Build a list of nic information tuples.
1607 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1608 value in LUInstanceQueryData.
1610 @type lu: L{LogicalUnit}
1611 @param lu: the logical unit on whose behalf we execute
1612 @type nics: list of L{objects.NIC}
1613 @param nics: list of nics to convert to hooks tuples
1618 hooks_nics.append(_NICToTuple(lu, nic))
1622 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1623 """Builds instance related env variables for hooks from an object.
1625 @type lu: L{LogicalUnit}
1626 @param lu: the logical unit on whose behalf we execute
1627 @type instance: L{objects.Instance}
1628 @param instance: the instance for which we should build the
1630 @type override: dict
1631 @param override: dictionary with key/values that will override
1634 @return: the hook environment dictionary
1637 cluster = lu.cfg.GetClusterInfo()
1638 bep = cluster.FillBE(instance)
1639 hvp = cluster.FillHV(instance)
1641 "name": instance.name,
1642 "primary_node": instance.primary_node,
1643 "secondary_nodes": instance.secondary_nodes,
1644 "os_type": instance.os,
1645 "status": instance.admin_state,
1646 "maxmem": bep[constants.BE_MAXMEM],
1647 "minmem": bep[constants.BE_MINMEM],
1648 "vcpus": bep[constants.BE_VCPUS],
1649 "nics": _NICListToTuple(lu, instance.nics),
1650 "disk_template": instance.disk_template,
1651 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1654 "hypervisor_name": instance.hypervisor,
1655 "tags": instance.tags,
1658 args.update(override)
1659 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1662 def _AdjustCandidatePool(lu, exceptions):
1663 """Adjust the candidate pool after node operations.
1666 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1668 lu.LogInfo("Promoted nodes to master candidate role: %s",
1669 utils.CommaJoin(node.name for node in mod_list))
1670 for name in mod_list:
1671 lu.context.ReaddNode(name)
1672 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1674 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1678 def _DecideSelfPromotion(lu, exceptions=None):
1679 """Decide whether I should promote myself as a master candidate.
1682 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1683 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1684 # the new node will increase mc_max with one, so:
1685 mc_should = min(mc_should + 1, cp_size)
1686 return mc_now < mc_should
1689 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1690 """Computes a set of instances who violates given ipolicy.
1692 @param ipolicy: The ipolicy to verify
1693 @type instances: L{objects.Instance}
1694 @param instances: List of instances to verify
1695 @type cfg: L{config.ConfigWriter}
1696 @param cfg: Cluster configuration
1697 @return: A frozenset of instance names violating the ipolicy
1700 return frozenset([inst.name for inst in instances
1701 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1704 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1705 """Check that the brigdes needed by a list of nics exist.
1708 cluster = lu.cfg.GetClusterInfo()
1709 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1710 brlist = [params[constants.NIC_LINK] for params in paramslist
1711 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1713 result = lu.rpc.call_bridges_exist(target_node, brlist)
1714 result.Raise("Error checking bridges on destination node '%s'" %
1715 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1718 def _CheckInstanceBridgesExist(lu, instance, node=None):
1719 """Check that the brigdes needed by an instance exist.
1723 node = instance.primary_node
1724 _CheckNicsBridgesExist(lu, instance.nics, node)
1727 def _CheckOSVariant(os_obj, name):
1728 """Check whether an OS name conforms to the os variants specification.
1730 @type os_obj: L{objects.OS}
1731 @param os_obj: OS object to check
1733 @param name: OS name passed by the user, to check for validity
1736 variant = objects.OS.GetVariant(name)
1737 if not os_obj.supported_variants:
1739 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1740 " passed)" % (os_obj.name, variant),
1744 raise errors.OpPrereqError("OS name must include a variant",
1747 if variant not in os_obj.supported_variants:
1748 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1751 def _GetNodeInstancesInner(cfg, fn):
1752 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1755 def _GetNodeInstances(cfg, node_name):
1756 """Returns a list of all primary and secondary instances on a node.
1760 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1763 def _GetNodePrimaryInstances(cfg, node_name):
1764 """Returns primary instances on a node.
1767 return _GetNodeInstancesInner(cfg,
1768 lambda inst: node_name == inst.primary_node)
1771 def _GetNodeSecondaryInstances(cfg, node_name):
1772 """Returns secondary instances on a node.
1775 return _GetNodeInstancesInner(cfg,
1776 lambda inst: node_name in inst.secondary_nodes)
1779 def _GetStorageTypeArgs(cfg, storage_type):
1780 """Returns the arguments for a storage type.
1783 # Special case for file storage
1784 if storage_type == constants.ST_FILE:
1785 # storage.FileStorage wants a list of storage directories
1786 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1791 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1794 for dev in instance.disks:
1795 cfg.SetDiskID(dev, node_name)
1797 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1799 result.Raise("Failed to get disk status from node %s" % node_name,
1800 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1802 for idx, bdev_status in enumerate(result.payload):
1803 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1809 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1810 """Check the sanity of iallocator and node arguments and use the
1811 cluster-wide iallocator if appropriate.
1813 Check that at most one of (iallocator, node) is specified. If none is
1814 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1815 then the LU's opcode's iallocator slot is filled with the cluster-wide
1818 @type iallocator_slot: string
1819 @param iallocator_slot: the name of the opcode iallocator slot
1820 @type node_slot: string
1821 @param node_slot: the name of the opcode target node slot
1824 node = getattr(lu.op, node_slot, None)
1825 ialloc = getattr(lu.op, iallocator_slot, None)
1829 if node is not None and ialloc is not None:
1830 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1832 elif ((node is None and ialloc is None) or
1833 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1834 default_iallocator = lu.cfg.GetDefaultIAllocator()
1835 if default_iallocator:
1836 setattr(lu.op, iallocator_slot, default_iallocator)
1838 raise errors.OpPrereqError("No iallocator or node given and no"
1839 " cluster-wide default iallocator found;"
1840 " please specify either an iallocator or a"
1841 " node, or set a cluster-wide default"
1842 " iallocator", errors.ECODE_INVAL)
1845 def _GetDefaultIAllocator(cfg, ialloc):
1846 """Decides on which iallocator to use.
1848 @type cfg: L{config.ConfigWriter}
1849 @param cfg: Cluster configuration object
1850 @type ialloc: string or None
1851 @param ialloc: Iallocator specified in opcode
1853 @return: Iallocator name
1857 # Use default iallocator
1858 ialloc = cfg.GetDefaultIAllocator()
1861 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1862 " opcode nor as a cluster-wide default",
1868 def _CheckHostnameSane(lu, name):
1869 """Ensures that a given hostname resolves to a 'sane' name.
1871 The given name is required to be a prefix of the resolved hostname,
1872 to prevent accidental mismatches.
1874 @param lu: the logical unit on behalf of which we're checking
1875 @param name: the name we should resolve and check
1876 @return: the resolved hostname object
1879 hostname = netutils.GetHostname(name=name)
1880 if hostname.name != name:
1881 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1882 if not utils.MatchNameComponent(name, [hostname.name]):
1883 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1884 " same as given hostname '%s'") %
1885 (hostname.name, name), errors.ECODE_INVAL)
1889 class LUClusterPostInit(LogicalUnit):
1890 """Logical unit for running hooks after cluster initialization.
1893 HPATH = "cluster-init"
1894 HTYPE = constants.HTYPE_CLUSTER
1896 def BuildHooksEnv(self):
1901 "OP_TARGET": self.cfg.GetClusterName(),
1904 def BuildHooksNodes(self):
1905 """Build hooks nodes.
1908 return ([], [self.cfg.GetMasterNode()])
1910 def Exec(self, feedback_fn):
1917 class LUClusterDestroy(LogicalUnit):
1918 """Logical unit for destroying the cluster.
1921 HPATH = "cluster-destroy"
1922 HTYPE = constants.HTYPE_CLUSTER
1924 def BuildHooksEnv(self):
1929 "OP_TARGET": self.cfg.GetClusterName(),
1932 def BuildHooksNodes(self):
1933 """Build hooks nodes.
1938 def CheckPrereq(self):
1939 """Check prerequisites.
1941 This checks whether the cluster is empty.
1943 Any errors are signaled by raising errors.OpPrereqError.
1946 master = self.cfg.GetMasterNode()
1948 nodelist = self.cfg.GetNodeList()
1949 if len(nodelist) != 1 or nodelist[0] != master:
1950 raise errors.OpPrereqError("There are still %d node(s) in"
1951 " this cluster." % (len(nodelist) - 1),
1953 instancelist = self.cfg.GetInstanceList()
1955 raise errors.OpPrereqError("There are still %d instance(s) in"
1956 " this cluster." % len(instancelist),
1959 def Exec(self, feedback_fn):
1960 """Destroys the cluster.
1963 master_params = self.cfg.GetMasterNetworkParameters()
1965 # Run post hooks on master node before it's removed
1966 _RunPostHook(self, master_params.name)
1968 ems = self.cfg.GetUseExternalMipScript()
1969 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1972 self.LogWarning("Error disabling the master IP address: %s",
1975 return master_params.name
1978 def _VerifyCertificate(filename):
1979 """Verifies a certificate for L{LUClusterVerifyConfig}.
1981 @type filename: string
1982 @param filename: Path to PEM file
1986 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1987 utils.ReadFile(filename))
1988 except Exception, err: # pylint: disable=W0703
1989 return (LUClusterVerifyConfig.ETYPE_ERROR,
1990 "Failed to load X509 certificate %s: %s" % (filename, err))
1993 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1994 constants.SSL_CERT_EXPIRATION_ERROR)
1997 fnamemsg = "While verifying %s: %s" % (filename, msg)
2002 return (None, fnamemsg)
2003 elif errcode == utils.CERT_WARNING:
2004 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
2005 elif errcode == utils.CERT_ERROR:
2006 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
2008 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
2011 def _GetAllHypervisorParameters(cluster, instances):
2012 """Compute the set of all hypervisor parameters.
2014 @type cluster: L{objects.Cluster}
2015 @param cluster: the cluster object
2016 @param instances: list of L{objects.Instance}
2017 @param instances: additional instances from which to obtain parameters
2018 @rtype: list of (origin, hypervisor, parameters)
2019 @return: a list with all parameters found, indicating the hypervisor they
2020 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2025 for hv_name in cluster.enabled_hypervisors:
2026 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2028 for os_name, os_hvp in cluster.os_hvp.items():
2029 for hv_name, hv_params in os_hvp.items():
2031 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2032 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2034 # TODO: collapse identical parameter values in a single one
2035 for instance in instances:
2036 if instance.hvparams:
2037 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2038 cluster.FillHV(instance)))
2043 class _VerifyErrors(object):
2044 """Mix-in for cluster/group verify LUs.
2046 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2047 self.op and self._feedback_fn to be available.)
2051 ETYPE_FIELD = "code"
2052 ETYPE_ERROR = "ERROR"
2053 ETYPE_WARNING = "WARNING"
2055 def _Error(self, ecode, item, msg, *args, **kwargs):
2056 """Format an error message.
2058 Based on the opcode's error_codes parameter, either format a
2059 parseable error code, or a simpler error string.
2061 This must be called only from Exec and functions called from Exec.
2064 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2065 itype, etxt, _ = ecode
2066 # If the error code is in the list of ignored errors, demote the error to a
2068 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2069 ltype = self.ETYPE_WARNING
2070 # first complete the msg
2073 # then format the whole message
2074 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2075 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2081 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2082 # and finally report it via the feedback_fn
2083 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2084 # do not mark the operation as failed for WARN cases only
2085 if ltype == self.ETYPE_ERROR:
2088 def _ErrorIf(self, cond, *args, **kwargs):
2089 """Log an error message if the passed condition is True.
2093 or self.op.debug_simulate_errors): # pylint: disable=E1101
2094 self._Error(*args, **kwargs)
2097 class LUClusterVerify(NoHooksLU):
2098 """Submits all jobs necessary to verify the cluster.
2103 def ExpandNames(self):
2104 self.needed_locks = {}
2106 def Exec(self, feedback_fn):
2109 if self.op.group_name:
2110 groups = [self.op.group_name]
2111 depends_fn = lambda: None
2113 groups = self.cfg.GetNodeGroupList()
2115 # Verify global configuration
2117 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2120 # Always depend on global verification
2121 depends_fn = lambda: [(-len(jobs), [])]
2124 [opcodes.OpClusterVerifyGroup(group_name=group,
2125 ignore_errors=self.op.ignore_errors,
2126 depends=depends_fn())]
2127 for group in groups)
2129 # Fix up all parameters
2130 for op in itertools.chain(*jobs): # pylint: disable=W0142
2131 op.debug_simulate_errors = self.op.debug_simulate_errors
2132 op.verbose = self.op.verbose
2133 op.error_codes = self.op.error_codes
2135 op.skip_checks = self.op.skip_checks
2136 except AttributeError:
2137 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2139 return ResultWithJobs(jobs)
2142 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2143 """Verifies the cluster config.
2148 def _VerifyHVP(self, hvp_data):
2149 """Verifies locally the syntax of the hypervisor parameters.
2152 for item, hv_name, hv_params in hvp_data:
2153 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2156 hv_class = hypervisor.GetHypervisorClass(hv_name)
2157 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2158 hv_class.CheckParameterSyntax(hv_params)
2159 except errors.GenericError, err:
2160 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2162 def ExpandNames(self):
2163 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2164 self.share_locks = _ShareAll()
2166 def CheckPrereq(self):
2167 """Check prerequisites.
2170 # Retrieve all information
2171 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2172 self.all_node_info = self.cfg.GetAllNodesInfo()
2173 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2175 def Exec(self, feedback_fn):
2176 """Verify integrity of cluster, performing various test on nodes.
2180 self._feedback_fn = feedback_fn
2182 feedback_fn("* Verifying cluster config")
2184 for msg in self.cfg.VerifyConfig():
2185 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2187 feedback_fn("* Verifying cluster certificate files")
2189 for cert_filename in pathutils.ALL_CERT_FILES:
2190 (errcode, msg) = _VerifyCertificate(cert_filename)
2191 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2193 feedback_fn("* Verifying hypervisor parameters")
2195 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2196 self.all_inst_info.values()))
2198 feedback_fn("* Verifying all nodes belong to an existing group")
2200 # We do this verification here because, should this bogus circumstance
2201 # occur, it would never be caught by VerifyGroup, which only acts on
2202 # nodes/instances reachable from existing node groups.
2204 dangling_nodes = set(node.name for node in self.all_node_info.values()
2205 if node.group not in self.all_group_info)
2207 dangling_instances = {}
2208 no_node_instances = []
2210 for inst in self.all_inst_info.values():
2211 if inst.primary_node in dangling_nodes:
2212 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2213 elif inst.primary_node not in self.all_node_info:
2214 no_node_instances.append(inst.name)
2219 utils.CommaJoin(dangling_instances.get(node.name,
2221 for node in dangling_nodes]
2223 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2225 "the following nodes (and their instances) belong to a non"
2226 " existing group: %s", utils.CommaJoin(pretty_dangling))
2228 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2230 "the following instances have a non-existing primary-node:"
2231 " %s", utils.CommaJoin(no_node_instances))
2236 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2237 """Verifies the status of a node group.
2240 HPATH = "cluster-verify"
2241 HTYPE = constants.HTYPE_CLUSTER
2244 _HOOKS_INDENT_RE = re.compile("^", re.M)
2246 class NodeImage(object):
2247 """A class representing the logical and physical status of a node.
2250 @ivar name: the node name to which this object refers
2251 @ivar volumes: a structure as returned from
2252 L{ganeti.backend.GetVolumeList} (runtime)
2253 @ivar instances: a list of running instances (runtime)
2254 @ivar pinst: list of configured primary instances (config)
2255 @ivar sinst: list of configured secondary instances (config)
2256 @ivar sbp: dictionary of {primary-node: list of instances} for all
2257 instances for which this node is secondary (config)
2258 @ivar mfree: free memory, as reported by hypervisor (runtime)
2259 @ivar dfree: free disk, as reported by the node (runtime)
2260 @ivar offline: the offline status (config)
2261 @type rpc_fail: boolean
2262 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2263 not whether the individual keys were correct) (runtime)
2264 @type lvm_fail: boolean
2265 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2266 @type hyp_fail: boolean
2267 @ivar hyp_fail: whether the RPC call didn't return the instance list
2268 @type ghost: boolean
2269 @ivar ghost: whether this is a known node or not (config)
2270 @type os_fail: boolean
2271 @ivar os_fail: whether the RPC call didn't return valid OS data
2273 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2274 @type vm_capable: boolean
2275 @ivar vm_capable: whether the node can host instances
2277 @ivar pv_min: size in MiB of the smallest PVs
2279 @ivar pv_max: size in MiB of the biggest PVs
2282 def __init__(self, offline=False, name=None, vm_capable=True):
2291 self.offline = offline
2292 self.vm_capable = vm_capable
2293 self.rpc_fail = False
2294 self.lvm_fail = False
2295 self.hyp_fail = False
2297 self.os_fail = False
2302 def ExpandNames(self):
2303 # This raises errors.OpPrereqError on its own:
2304 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2306 # Get instances in node group; this is unsafe and needs verification later
2308 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2310 self.needed_locks = {
2311 locking.LEVEL_INSTANCE: inst_names,
2312 locking.LEVEL_NODEGROUP: [self.group_uuid],
2313 locking.LEVEL_NODE: [],
2315 # This opcode is run by watcher every five minutes and acquires all nodes
2316 # for a group. It doesn't run for a long time, so it's better to acquire
2317 # the node allocation lock as well.
2318 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2321 self.share_locks = _ShareAll()
2323 def DeclareLocks(self, level):
2324 if level == locking.LEVEL_NODE:
2325 # Get members of node group; this is unsafe and needs verification later
2326 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2328 all_inst_info = self.cfg.GetAllInstancesInfo()
2330 # In Exec(), we warn about mirrored instances that have primary and
2331 # secondary living in separate node groups. To fully verify that
2332 # volumes for these instances are healthy, we will need to do an
2333 # extra call to their secondaries. We ensure here those nodes will
2335 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2336 # Important: access only the instances whose lock is owned
2337 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2338 nodes.update(all_inst_info[inst].secondary_nodes)
2340 self.needed_locks[locking.LEVEL_NODE] = nodes
2342 def CheckPrereq(self):
2343 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2344 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2346 group_nodes = set(self.group_info.members)
2348 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2351 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2353 unlocked_instances = \
2354 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2357 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2358 utils.CommaJoin(unlocked_nodes),
2361 if unlocked_instances:
2362 raise errors.OpPrereqError("Missing lock for instances: %s" %
2363 utils.CommaJoin(unlocked_instances),
2366 self.all_node_info = self.cfg.GetAllNodesInfo()
2367 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2369 self.my_node_names = utils.NiceSort(group_nodes)
2370 self.my_inst_names = utils.NiceSort(group_instances)
2372 self.my_node_info = dict((name, self.all_node_info[name])
2373 for name in self.my_node_names)
2375 self.my_inst_info = dict((name, self.all_inst_info[name])
2376 for name in self.my_inst_names)
2378 # We detect here the nodes that will need the extra RPC calls for verifying
2379 # split LV volumes; they should be locked.
2380 extra_lv_nodes = set()
2382 for inst in self.my_inst_info.values():
2383 if inst.disk_template in constants.DTS_INT_MIRROR:
2384 for nname in inst.all_nodes:
2385 if self.all_node_info[nname].group != self.group_uuid:
2386 extra_lv_nodes.add(nname)
2388 unlocked_lv_nodes = \
2389 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2391 if unlocked_lv_nodes:
2392 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2393 utils.CommaJoin(unlocked_lv_nodes),
2395 self.extra_lv_nodes = list(extra_lv_nodes)
2397 def _VerifyNode(self, ninfo, nresult):
2398 """Perform some basic validation on data returned from a node.
2400 - check the result data structure is well formed and has all the
2402 - check ganeti version
2404 @type ninfo: L{objects.Node}
2405 @param ninfo: the node to check
2406 @param nresult: the results from the node
2408 @return: whether overall this call was successful (and we can expect
2409 reasonable values in the respose)
2413 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2415 # main result, nresult should be a non-empty dict
2416 test = not nresult or not isinstance(nresult, dict)
2417 _ErrorIf(test, constants.CV_ENODERPC, node,
2418 "unable to verify node: no data returned")
2422 # compares ganeti version
2423 local_version = constants.PROTOCOL_VERSION
2424 remote_version = nresult.get("version", None)
2425 test = not (remote_version and
2426 isinstance(remote_version, (list, tuple)) and
2427 len(remote_version) == 2)
2428 _ErrorIf(test, constants.CV_ENODERPC, node,
2429 "connection to node returned invalid data")
2433 test = local_version != remote_version[0]
2434 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2435 "incompatible protocol versions: master %s,"
2436 " node %s", local_version, remote_version[0])
2440 # node seems compatible, we can actually try to look into its results
2442 # full package version
2443 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2444 constants.CV_ENODEVERSION, node,
2445 "software version mismatch: master %s, node %s",
2446 constants.RELEASE_VERSION, remote_version[1],
2447 code=self.ETYPE_WARNING)
2449 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2450 if ninfo.vm_capable and isinstance(hyp_result, dict):
2451 for hv_name, hv_result in hyp_result.iteritems():
2452 test = hv_result is not None
2453 _ErrorIf(test, constants.CV_ENODEHV, node,
2454 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2456 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2457 if ninfo.vm_capable and isinstance(hvp_result, list):
2458 for item, hv_name, hv_result in hvp_result:
2459 _ErrorIf(True, constants.CV_ENODEHV, node,
2460 "hypervisor %s parameter verify failure (source %s): %s",
2461 hv_name, item, hv_result)
2463 test = nresult.get(constants.NV_NODESETUP,
2464 ["Missing NODESETUP results"])
2465 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2470 def _VerifyNodeTime(self, ninfo, nresult,
2471 nvinfo_starttime, nvinfo_endtime):
2472 """Check the node time.
2474 @type ninfo: L{objects.Node}
2475 @param ninfo: the node to check
2476 @param nresult: the remote results for the node
2477 @param nvinfo_starttime: the start time of the RPC call
2478 @param nvinfo_endtime: the end time of the RPC call
2482 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2484 ntime = nresult.get(constants.NV_TIME, None)
2486 ntime_merged = utils.MergeTime(ntime)
2487 except (ValueError, TypeError):
2488 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2491 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2492 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2493 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2494 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2498 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2499 "Node time diverges by at least %s from master node time",
2502 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2503 """Check the node LVM results and update info for cross-node checks.
2505 @type ninfo: L{objects.Node}
2506 @param ninfo: the node to check
2507 @param nresult: the remote results for the node
2508 @param vg_name: the configured VG name
2509 @type nimg: L{NodeImage}
2510 @param nimg: node image
2517 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2519 # checks vg existence and size > 20G
2520 vglist = nresult.get(constants.NV_VGLIST, None)
2522 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2524 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2525 constants.MIN_VG_SIZE)
2526 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2529 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2531 self._Error(constants.CV_ENODELVM, node, em)
2532 if pvminmax is not None:
2533 (nimg.pv_min, nimg.pv_max) = pvminmax
2535 def _VerifyGroupLVM(self, node_image, vg_name):
2536 """Check cross-node consistency in LVM.
2538 @type node_image: dict
2539 @param node_image: info about nodes, mapping from node to names to
2540 L{NodeImage} objects
2541 @param vg_name: the configured VG name
2547 # Only exlcusive storage needs this kind of checks
2548 if not self._exclusive_storage:
2551 # exclusive_storage wants all PVs to have the same size (approximately),
2552 # if the smallest and the biggest ones are okay, everything is fine.
2553 # pv_min is None iff pv_max is None
2554 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2557 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2558 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2559 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2560 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2561 "PV sizes differ too much in the group; smallest (%s MB) is"
2562 " on %s, biggest (%s MB) is on %s",
2563 pvmin, minnode, pvmax, maxnode)
2565 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2566 """Check the node bridges.
2568 @type ninfo: L{objects.Node}
2569 @param ninfo: the node to check
2570 @param nresult: the remote results for the node
2571 @param bridges: the expected list of bridges
2578 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2580 missing = nresult.get(constants.NV_BRIDGES, None)
2581 test = not isinstance(missing, list)
2582 _ErrorIf(test, constants.CV_ENODENET, node,
2583 "did not return valid bridge information")
2585 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2586 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2588 def _VerifyNodeUserScripts(self, ninfo, nresult):
2589 """Check the results of user scripts presence and executability on the node
2591 @type ninfo: L{objects.Node}
2592 @param ninfo: the node to check
2593 @param nresult: the remote results for the node
2598 test = not constants.NV_USERSCRIPTS in nresult
2599 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2600 "did not return user scripts information")
2602 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2604 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2605 "user scripts not present or not executable: %s" %
2606 utils.CommaJoin(sorted(broken_scripts)))
2608 def _VerifyNodeNetwork(self, ninfo, nresult):
2609 """Check the node network connectivity results.
2611 @type ninfo: L{objects.Node}
2612 @param ninfo: the node to check
2613 @param nresult: the remote results for the node
2617 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 test = constants.NV_NODELIST not in nresult
2620 _ErrorIf(test, constants.CV_ENODESSH, node,
2621 "node hasn't returned node ssh connectivity data")
2623 if nresult[constants.NV_NODELIST]:
2624 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2625 _ErrorIf(True, constants.CV_ENODESSH, node,
2626 "ssh communication with node '%s': %s", a_node, a_msg)
2628 test = constants.NV_NODENETTEST not in nresult
2629 _ErrorIf(test, constants.CV_ENODENET, node,
2630 "node hasn't returned node tcp connectivity data")
2632 if nresult[constants.NV_NODENETTEST]:
2633 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2635 _ErrorIf(True, constants.CV_ENODENET, node,
2636 "tcp communication with node '%s': %s",
2637 anode, nresult[constants.NV_NODENETTEST][anode])
2639 test = constants.NV_MASTERIP not in nresult
2640 _ErrorIf(test, constants.CV_ENODENET, node,
2641 "node hasn't returned node master IP reachability data")
2643 if not nresult[constants.NV_MASTERIP]:
2644 if node == self.master_node:
2645 msg = "the master node cannot reach the master IP (not configured?)"
2647 msg = "cannot reach the master IP"
2648 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2650 def _VerifyInstance(self, instance, inst_config, node_image,
2652 """Verify an instance.
2654 This function checks to see if the required block devices are
2655 available on the instance's node, and that the nodes are in the correct
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2660 pnode = inst_config.primary_node
2661 pnode_img = node_image[pnode]
2662 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2664 node_vol_should = {}
2665 inst_config.MapLVsByNode(node_vol_should)
2667 cluster = self.cfg.GetClusterInfo()
2668 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2670 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2671 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2672 code=self.ETYPE_WARNING)
2674 for node in node_vol_should:
2675 n_img = node_image[node]
2676 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2677 # ignore missing volumes on offline or broken nodes
2679 for volume in node_vol_should[node]:
2680 test = volume not in n_img.volumes
2681 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2682 "volume %s missing on node %s", volume, node)
2684 if inst_config.admin_state == constants.ADMINST_UP:
2685 test = instance not in pnode_img.instances and not pnode_img.offline
2686 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2687 "instance not running on its primary node %s",
2689 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2690 "instance is marked as running and lives on offline node %s",
2693 diskdata = [(nname, success, status, idx)
2694 for (nname, disks) in diskstatus.items()
2695 for idx, (success, status) in enumerate(disks)]
2697 for nname, success, bdev_status, idx in diskdata:
2698 # the 'ghost node' construction in Exec() ensures that we have a
2700 snode = node_image[nname]
2701 bad_snode = snode.ghost or snode.offline
2702 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2703 not success and not bad_snode,
2704 constants.CV_EINSTANCEFAULTYDISK, instance,
2705 "couldn't retrieve status for disk/%s on %s: %s",
2706 idx, nname, bdev_status)
2707 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2708 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2709 constants.CV_EINSTANCEFAULTYDISK, instance,
2710 "disk/%s on %s is faulty", idx, nname)
2712 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2713 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2714 " primary node failed", instance)
2716 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2717 constants.CV_EINSTANCELAYOUT,
2718 instance, "instance has multiple secondary nodes: %s",
2719 utils.CommaJoin(inst_config.secondary_nodes),
2720 code=self.ETYPE_WARNING)
2722 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2723 # Disk template not compatible with exclusive_storage: no instance
2724 # node should have the flag set
2725 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2726 inst_config.all_nodes)
2727 es_nodes = [n for (n, es) in es_flags.items()
2729 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2730 "instance has template %s, which is not supported on nodes"
2731 " that have exclusive storage set: %s",
2732 inst_config.disk_template, utils.CommaJoin(es_nodes))
2734 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2735 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2736 instance_groups = {}
2738 for node in instance_nodes:
2739 instance_groups.setdefault(self.all_node_info[node].group,
2743 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2744 # Sort so that we always list the primary node first.
2745 for group, nodes in sorted(instance_groups.items(),
2746 key=lambda (_, nodes): pnode in nodes,
2749 self._ErrorIf(len(instance_groups) > 1,
2750 constants.CV_EINSTANCESPLITGROUPS,
2751 instance, "instance has primary and secondary nodes in"
2752 " different groups: %s", utils.CommaJoin(pretty_list),
2753 code=self.ETYPE_WARNING)
2755 inst_nodes_offline = []
2756 for snode in inst_config.secondary_nodes:
2757 s_img = node_image[snode]
2758 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2759 snode, "instance %s, connection to secondary node failed",
2763 inst_nodes_offline.append(snode)
2765 # warn that the instance lives on offline nodes
2766 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2767 "instance has offline secondary node(s) %s",
2768 utils.CommaJoin(inst_nodes_offline))
2769 # ... or ghost/non-vm_capable nodes
2770 for node in inst_config.all_nodes:
2771 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2772 instance, "instance lives on ghost node %s", node)
2773 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2774 instance, "instance lives on non-vm_capable node %s", node)
2776 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2777 """Verify if there are any unknown volumes in the cluster.
2779 The .os, .swap and backup volumes are ignored. All other volumes are
2780 reported as unknown.
2782 @type reserved: L{ganeti.utils.FieldSet}
2783 @param reserved: a FieldSet of reserved volume names
2786 for node, n_img in node_image.items():
2787 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2788 self.all_node_info[node].group != self.group_uuid):
2789 # skip non-healthy nodes
2791 for volume in n_img.volumes:
2792 test = ((node not in node_vol_should or
2793 volume not in node_vol_should[node]) and
2794 not reserved.Matches(volume))
2795 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2796 "volume %s is unknown", volume)
2798 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2799 """Verify N+1 Memory Resilience.
2801 Check that if one single node dies we can still start all the
2802 instances it was primary for.
2805 cluster_info = self.cfg.GetClusterInfo()
2806 for node, n_img in node_image.items():
2807 # This code checks that every node which is now listed as
2808 # secondary has enough memory to host all instances it is
2809 # supposed to should a single other node in the cluster fail.
2810 # FIXME: not ready for failover to an arbitrary node
2811 # FIXME: does not support file-backed instances
2812 # WARNING: we currently take into account down instances as well
2813 # as up ones, considering that even if they're down someone
2814 # might want to start them even in the event of a node failure.
2815 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2816 # we're skipping nodes marked offline and nodes in other groups from
2817 # the N+1 warning, since most likely we don't have good memory
2818 # infromation from them; we already list instances living on such
2819 # nodes, and that's enough warning
2821 #TODO(dynmem): also consider ballooning out other instances
2822 for prinode, instances in n_img.sbp.items():
2824 for instance in instances:
2825 bep = cluster_info.FillBE(instance_cfg[instance])
2826 if bep[constants.BE_AUTO_BALANCE]:
2827 needed_mem += bep[constants.BE_MINMEM]
2828 test = n_img.mfree < needed_mem
2829 self._ErrorIf(test, constants.CV_ENODEN1, node,
2830 "not enough memory to accomodate instance failovers"
2831 " should node %s fail (%dMiB needed, %dMiB available)",
2832 prinode, needed_mem, n_img.mfree)
2835 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2836 (files_all, files_opt, files_mc, files_vm)):
2837 """Verifies file checksums collected from all nodes.
2839 @param errorif: Callback for reporting errors
2840 @param nodeinfo: List of L{objects.Node} objects
2841 @param master_node: Name of master node
2842 @param all_nvinfo: RPC results
2845 # Define functions determining which nodes to consider for a file
2848 (files_mc, lambda node: (node.master_candidate or
2849 node.name == master_node)),
2850 (files_vm, lambda node: node.vm_capable),
2853 # Build mapping from filename to list of nodes which should have the file
2855 for (files, fn) in files2nodefn:
2857 filenodes = nodeinfo
2859 filenodes = filter(fn, nodeinfo)
2860 nodefiles.update((filename,
2861 frozenset(map(operator.attrgetter("name"), filenodes)))
2862 for filename in files)
2864 assert set(nodefiles) == (files_all | files_mc | files_vm)
2866 fileinfo = dict((filename, {}) for filename in nodefiles)
2867 ignore_nodes = set()
2869 for node in nodeinfo:
2871 ignore_nodes.add(node.name)
2874 nresult = all_nvinfo[node.name]
2876 if nresult.fail_msg or not nresult.payload:
2879 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2880 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2881 for (key, value) in fingerprints.items())
2884 test = not (node_files and isinstance(node_files, dict))
2885 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2886 "Node did not return file checksum data")
2888 ignore_nodes.add(node.name)
2891 # Build per-checksum mapping from filename to nodes having it
2892 for (filename, checksum) in node_files.items():
2893 assert filename in nodefiles
2894 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2896 for (filename, checksums) in fileinfo.items():
2897 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2899 # Nodes having the file
2900 with_file = frozenset(node_name
2901 for nodes in fileinfo[filename].values()
2902 for node_name in nodes) - ignore_nodes
2904 expected_nodes = nodefiles[filename] - ignore_nodes
2906 # Nodes missing file
2907 missing_file = expected_nodes - with_file
2909 if filename in files_opt:
2911 errorif(missing_file and missing_file != expected_nodes,
2912 constants.CV_ECLUSTERFILECHECK, None,
2913 "File %s is optional, but it must exist on all or no"
2914 " nodes (not found on %s)",
2915 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2917 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2918 "File %s is missing from node(s) %s", filename,
2919 utils.CommaJoin(utils.NiceSort(missing_file)))
2921 # Warn if a node has a file it shouldn't
2922 unexpected = with_file - expected_nodes
2924 constants.CV_ECLUSTERFILECHECK, None,
2925 "File %s should not exist on node(s) %s",
2926 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2928 # See if there are multiple versions of the file
2929 test = len(checksums) > 1
2931 variants = ["variant %s on %s" %
2932 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2933 for (idx, (checksum, nodes)) in
2934 enumerate(sorted(checksums.items()))]
2938 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2939 "File %s found with %s different checksums (%s)",
2940 filename, len(checksums), "; ".join(variants))
2942 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2944 """Verifies and the node DRBD status.
2946 @type ninfo: L{objects.Node}
2947 @param ninfo: the node to check
2948 @param nresult: the remote results for the node
2949 @param instanceinfo: the dict of instances
2950 @param drbd_helper: the configured DRBD usermode helper
2951 @param drbd_map: the DRBD map as returned by
2952 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2956 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2959 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2960 test = (helper_result is None)
2961 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2962 "no drbd usermode helper returned")
2964 status, payload = helper_result
2966 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2967 "drbd usermode helper check unsuccessful: %s", payload)
2968 test = status and (payload != drbd_helper)
2969 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2970 "wrong drbd usermode helper: %s", payload)
2972 # compute the DRBD minors
2974 for minor, instance in drbd_map[node].items():
2975 test = instance not in instanceinfo
2976 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2977 "ghost instance '%s' in temporary DRBD map", instance)
2978 # ghost instance should not be running, but otherwise we
2979 # don't give double warnings (both ghost instance and
2980 # unallocated minor in use)
2982 node_drbd[minor] = (instance, False)
2984 instance = instanceinfo[instance]
2985 node_drbd[minor] = (instance.name,
2986 instance.admin_state == constants.ADMINST_UP)
2988 # and now check them
2989 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2990 test = not isinstance(used_minors, (tuple, list))
2991 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2992 "cannot parse drbd status file: %s", str(used_minors))
2994 # we cannot check drbd status
2997 for minor, (iname, must_exist) in node_drbd.items():
2998 test = minor not in used_minors and must_exist
2999 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3000 "drbd minor %d of instance %s is not active", minor, iname)
3001 for minor in used_minors:
3002 test = minor not in node_drbd
3003 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3004 "unallocated drbd minor %d is in use", minor)
3006 def _UpdateNodeOS(self, ninfo, nresult, nimg):
3007 """Builds the node OS structures.
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nresult: the remote results for the node
3012 @param nimg: the node image object
3016 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018 remote_os = nresult.get(constants.NV_OSLIST, None)
3019 test = (not isinstance(remote_os, list) or
3020 not compat.all(isinstance(v, list) and len(v) == 7
3021 for v in remote_os))
3023 _ErrorIf(test, constants.CV_ENODEOS, node,
3024 "node hasn't returned valid OS data")
3033 for (name, os_path, status, diagnose,
3034 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3036 if name not in os_dict:
3039 # parameters is a list of lists instead of list of tuples due to
3040 # JSON lacking a real tuple type, fix it:
3041 parameters = [tuple(v) for v in parameters]
3042 os_dict[name].append((os_path, status, diagnose,
3043 set(variants), set(parameters), set(api_ver)))
3045 nimg.oslist = os_dict
3047 def _VerifyNodeOS(self, ninfo, nimg, base):
3048 """Verifies the node OS list.
3050 @type ninfo: L{objects.Node}
3051 @param ninfo: the node to check
3052 @param nimg: the node image object
3053 @param base: the 'template' node we match against (e.g. from the master)
3057 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3059 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3061 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3062 for os_name, os_data in nimg.oslist.items():
3063 assert os_data, "Empty OS status for OS %s?!" % os_name
3064 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3065 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3066 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3067 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3068 "OS '%s' has multiple entries (first one shadows the rest): %s",
3069 os_name, utils.CommaJoin([v[0] for v in os_data]))
3070 # comparisons with the 'base' image
3071 test = os_name not in base.oslist
3072 _ErrorIf(test, constants.CV_ENODEOS, node,
3073 "Extra OS %s not present on reference node (%s)",
3077 assert base.oslist[os_name], "Base node has empty OS status?"
3078 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3080 # base OS is invalid, skipping
3082 for kind, a, b in [("API version", f_api, b_api),
3083 ("variants list", f_var, b_var),
3084 ("parameters", beautify_params(f_param),
3085 beautify_params(b_param))]:
3086 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3087 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3088 kind, os_name, base.name,
3089 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3091 # check any missing OSes
3092 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3093 _ErrorIf(missing, constants.CV_ENODEOS, node,
3094 "OSes present on reference node %s but missing on this node: %s",
3095 base.name, utils.CommaJoin(missing))
3097 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3098 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3100 @type ninfo: L{objects.Node}
3101 @param ninfo: the node to check
3102 @param nresult: the remote results for the node
3103 @type is_master: bool
3104 @param is_master: Whether node is the master node
3110 (constants.ENABLE_FILE_STORAGE or
3111 constants.ENABLE_SHARED_FILE_STORAGE)):
3113 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3115 # This should never happen
3116 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3117 "Node did not return forbidden file storage paths")
3119 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3120 "Found forbidden file storage paths: %s",
3121 utils.CommaJoin(fspaths))
3123 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3124 constants.CV_ENODEFILESTORAGEPATHS, node,
3125 "Node should not have returned forbidden file storage"
3128 def _VerifyOob(self, ninfo, nresult):
3129 """Verifies out of band functionality of a node.
3131 @type ninfo: L{objects.Node}
3132 @param ninfo: the node to check
3133 @param nresult: the remote results for the node
3137 # We just have to verify the paths on master and/or master candidates
3138 # as the oob helper is invoked on the master
3139 if ((ninfo.master_candidate or ninfo.master_capable) and
3140 constants.NV_OOB_PATHS in nresult):
3141 for path_result in nresult[constants.NV_OOB_PATHS]:
3142 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3144 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3145 """Verifies and updates the node volume data.
3147 This function will update a L{NodeImage}'s internal structures
3148 with data from the remote call.
3150 @type ninfo: L{objects.Node}
3151 @param ninfo: the node to check
3152 @param nresult: the remote results for the node
3153 @param nimg: the node image object
3154 @param vg_name: the configured VG name
3158 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3160 nimg.lvm_fail = True
3161 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3164 elif isinstance(lvdata, basestring):
3165 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3166 utils.SafeEncode(lvdata))
3167 elif not isinstance(lvdata, dict):
3168 _ErrorIf(True, constants.CV_ENODELVM, node,
3169 "rpc call to node failed (lvlist)")
3171 nimg.volumes = lvdata
3172 nimg.lvm_fail = False
3174 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3175 """Verifies and updates the node instance list.
3177 If the listing was successful, then updates this node's instance
3178 list. Otherwise, it marks the RPC call as failed for the instance
3181 @type ninfo: L{objects.Node}
3182 @param ninfo: the node to check
3183 @param nresult: the remote results for the node
3184 @param nimg: the node image object
3187 idata = nresult.get(constants.NV_INSTANCELIST, None)
3188 test = not isinstance(idata, list)
3189 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3190 "rpc call to node failed (instancelist): %s",
3191 utils.SafeEncode(str(idata)))
3193 nimg.hyp_fail = True
3195 nimg.instances = idata
3197 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3198 """Verifies and computes a node information map
3200 @type ninfo: L{objects.Node}
3201 @param ninfo: the node to check
3202 @param nresult: the remote results for the node
3203 @param nimg: the node image object
3204 @param vg_name: the configured VG name
3208 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3210 # try to read free memory (from the hypervisor)
3211 hv_info = nresult.get(constants.NV_HVINFO, None)
3212 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3213 _ErrorIf(test, constants.CV_ENODEHV, node,
3214 "rpc call to node failed (hvinfo)")
3217 nimg.mfree = int(hv_info["memory_free"])
3218 except (ValueError, TypeError):
3219 _ErrorIf(True, constants.CV_ENODERPC, node,
3220 "node returned invalid nodeinfo, check hypervisor")
3222 # FIXME: devise a free space model for file based instances as well
3223 if vg_name is not None:
3224 test = (constants.NV_VGLIST not in nresult or
3225 vg_name not in nresult[constants.NV_VGLIST])
3226 _ErrorIf(test, constants.CV_ENODELVM, node,
3227 "node didn't return data for the volume group '%s'"
3228 " - it is either missing or broken", vg_name)
3231 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3232 except (ValueError, TypeError):
3233 _ErrorIf(True, constants.CV_ENODERPC, node,
3234 "node returned invalid LVM info, check LVM status")
3236 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3237 """Gets per-disk status information for all instances.
3239 @type nodelist: list of strings
3240 @param nodelist: Node names
3241 @type node_image: dict of (name, L{objects.Node})
3242 @param node_image: Node objects
3243 @type instanceinfo: dict of (name, L{objects.Instance})
3244 @param instanceinfo: Instance objects
3245 @rtype: {instance: {node: [(succes, payload)]}}
3246 @return: a dictionary of per-instance dictionaries with nodes as
3247 keys and disk information as values; the disk information is a
3248 list of tuples (success, payload)
3251 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3254 node_disks_devonly = {}
3255 diskless_instances = set()
3256 diskless = constants.DT_DISKLESS
3258 for nname in nodelist:
3259 node_instances = list(itertools.chain(node_image[nname].pinst,
3260 node_image[nname].sinst))
3261 diskless_instances.update(inst for inst in node_instances
3262 if instanceinfo[inst].disk_template == diskless)
3263 disks = [(inst, disk)
3264 for inst in node_instances
3265 for disk in instanceinfo[inst].disks]
3268 # No need to collect data
3271 node_disks[nname] = disks
3273 # _AnnotateDiskParams makes already copies of the disks
3275 for (inst, dev) in disks:
3276 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3277 self.cfg.SetDiskID(anno_disk, nname)
3278 devonly.append(anno_disk)
3280 node_disks_devonly[nname] = devonly
3282 assert len(node_disks) == len(node_disks_devonly)
3284 # Collect data from all nodes with disks
3285 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3288 assert len(result) == len(node_disks)
3292 for (nname, nres) in result.items():
3293 disks = node_disks[nname]
3296 # No data from this node
3297 data = len(disks) * [(False, "node offline")]
3300 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3301 "while getting disk information: %s", msg)
3303 # No data from this node
3304 data = len(disks) * [(False, msg)]
3307 for idx, i in enumerate(nres.payload):
3308 if isinstance(i, (tuple, list)) and len(i) == 2:
3311 logging.warning("Invalid result from node %s, entry %d: %s",
3313 data.append((False, "Invalid result from the remote node"))
3315 for ((inst, _), status) in zip(disks, data):
3316 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3318 # Add empty entries for diskless instances.
3319 for inst in diskless_instances:
3320 assert inst not in instdisk
3323 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3324 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3325 compat.all(isinstance(s, (tuple, list)) and
3326 len(s) == 2 for s in statuses)
3327 for inst, nnames in instdisk.items()
3328 for nname, statuses in nnames.items())
3330 instdisk_keys = set(instdisk)
3331 instanceinfo_keys = set(instanceinfo)
3332 assert instdisk_keys == instanceinfo_keys, \
3333 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3334 (instdisk_keys, instanceinfo_keys))
3339 def _SshNodeSelector(group_uuid, all_nodes):
3340 """Create endless iterators for all potential SSH check hosts.
3343 nodes = [node for node in all_nodes
3344 if (node.group != group_uuid and
3346 keyfunc = operator.attrgetter("group")
3348 return map(itertools.cycle,
3349 [sorted(map(operator.attrgetter("name"), names))
3350 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3354 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3355 """Choose which nodes should talk to which other nodes.
3357 We will make nodes contact all nodes in their group, and one node from
3360 @warning: This algorithm has a known issue if one node group is much
3361 smaller than others (e.g. just one node). In such a case all other
3362 nodes will talk to the single node.
3365 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3366 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3368 return (online_nodes,
3369 dict((name, sorted([i.next() for i in sel]))
3370 for name in online_nodes))
3372 def BuildHooksEnv(self):
3375 Cluster-Verify hooks just ran in the post phase and their failure makes
3376 the output be logged in the verify output and the verification to fail.
3380 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3383 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3384 for node in self.my_node_info.values())
3388 def BuildHooksNodes(self):
3389 """Build hooks nodes.
3392 return ([], self.my_node_names)
3394 def Exec(self, feedback_fn):
3395 """Verify integrity of the node group, performing various test on nodes.
3398 # This method has too many local variables. pylint: disable=R0914
3399 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3401 if not self.my_node_names:
3403 feedback_fn("* Empty node group, skipping verification")
3407 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3408 verbose = self.op.verbose
3409 self._feedback_fn = feedback_fn
3411 vg_name = self.cfg.GetVGName()
3412 drbd_helper = self.cfg.GetDRBDHelper()
3413 cluster = self.cfg.GetClusterInfo()
3414 hypervisors = cluster.enabled_hypervisors
3415 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3417 i_non_redundant = [] # Non redundant instances
3418 i_non_a_balanced = [] # Non auto-balanced instances
3419 i_offline = 0 # Count of offline instances
3420 n_offline = 0 # Count of offline nodes
3421 n_drained = 0 # Count of nodes being drained
3422 node_vol_should = {}
3424 # FIXME: verify OS list
3427 filemap = _ComputeAncillaryFiles(cluster, False)
3429 # do local checksums
3430 master_node = self.master_node = self.cfg.GetMasterNode()
3431 master_ip = self.cfg.GetMasterIP()
3433 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3436 if self.cfg.GetUseExternalMipScript():
3437 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3439 node_verify_param = {
3440 constants.NV_FILELIST:
3441 map(vcluster.MakeVirtualPath,
3442 utils.UniqueSequence(filename
3443 for files in filemap
3444 for filename in files)),
3445 constants.NV_NODELIST:
3446 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3447 self.all_node_info.values()),
3448 constants.NV_HYPERVISOR: hypervisors,
3449 constants.NV_HVPARAMS:
3450 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3451 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3452 for node in node_data_list
3453 if not node.offline],
3454 constants.NV_INSTANCELIST: hypervisors,
3455 constants.NV_VERSION: None,
3456 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3457 constants.NV_NODESETUP: None,
3458 constants.NV_TIME: None,
3459 constants.NV_MASTERIP: (master_node, master_ip),
3460 constants.NV_OSLIST: None,
3461 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3462 constants.NV_USERSCRIPTS: user_scripts,
3465 if vg_name is not None:
3466 node_verify_param[constants.NV_VGLIST] = None
3467 node_verify_param[constants.NV_LVLIST] = vg_name
3468 node_verify_param[constants.NV_PVLIST] = [vg_name]
3471 node_verify_param[constants.NV_DRBDLIST] = None
3472 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3474 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3475 # Load file storage paths only from master node
3476 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3479 # FIXME: this needs to be changed per node-group, not cluster-wide
3481 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3482 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3483 bridges.add(default_nicpp[constants.NIC_LINK])
3484 for instance in self.my_inst_info.values():
3485 for nic in instance.nics:
3486 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3487 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3488 bridges.add(full_nic[constants.NIC_LINK])
3491 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3493 # Build our expected cluster state
3494 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3496 vm_capable=node.vm_capable))
3497 for node in node_data_list)
3501 for node in self.all_node_info.values():
3502 path = _SupportsOob(self.cfg, node)
3503 if path and path not in oob_paths:
3504 oob_paths.append(path)
3507 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3509 for instance in self.my_inst_names:
3510 inst_config = self.my_inst_info[instance]
3511 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3514 for nname in inst_config.all_nodes:
3515 if nname not in node_image:
3516 gnode = self.NodeImage(name=nname)
3517 gnode.ghost = (nname not in self.all_node_info)
3518 node_image[nname] = gnode
3520 inst_config.MapLVsByNode(node_vol_should)
3522 pnode = inst_config.primary_node
3523 node_image[pnode].pinst.append(instance)
3525 for snode in inst_config.secondary_nodes:
3526 nimg = node_image[snode]
3527 nimg.sinst.append(instance)
3528 if pnode not in nimg.sbp:
3529 nimg.sbp[pnode] = []
3530 nimg.sbp[pnode].append(instance)
3532 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3533 # The value of exclusive_storage should be the same across the group, so if
3534 # it's True for at least a node, we act as if it were set for all the nodes
3535 self._exclusive_storage = compat.any(es_flags.values())
3536 if self._exclusive_storage:
3537 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3539 # At this point, we have the in-memory data structures complete,
3540 # except for the runtime information, which we'll gather next
3542 # Due to the way our RPC system works, exact response times cannot be
3543 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3544 # time before and after executing the request, we can at least have a time
3546 nvinfo_starttime = time.time()
3547 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3549 self.cfg.GetClusterName())
3550 nvinfo_endtime = time.time()
3552 if self.extra_lv_nodes and vg_name is not None:
3554 self.rpc.call_node_verify(self.extra_lv_nodes,
3555 {constants.NV_LVLIST: vg_name},
3556 self.cfg.GetClusterName())
3558 extra_lv_nvinfo = {}
3560 all_drbd_map = self.cfg.ComputeDRBDMap()
3562 feedback_fn("* Gathering disk information (%s nodes)" %
3563 len(self.my_node_names))
3564 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3567 feedback_fn("* Verifying configuration file consistency")
3569 # If not all nodes are being checked, we need to make sure the master node
3570 # and a non-checked vm_capable node are in the list.
3571 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3573 vf_nvinfo = all_nvinfo.copy()
3574 vf_node_info = list(self.my_node_info.values())
3575 additional_nodes = []
3576 if master_node not in self.my_node_info:
3577 additional_nodes.append(master_node)
3578 vf_node_info.append(self.all_node_info[master_node])
3579 # Add the first vm_capable node we find which is not included,
3580 # excluding the master node (which we already have)
3581 for node in absent_nodes:
3582 nodeinfo = self.all_node_info[node]
3583 if (nodeinfo.vm_capable and not nodeinfo.offline and
3584 node != master_node):
3585 additional_nodes.append(node)
3586 vf_node_info.append(self.all_node_info[node])
3588 key = constants.NV_FILELIST
3589 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3590 {key: node_verify_param[key]},
3591 self.cfg.GetClusterName()))
3593 vf_nvinfo = all_nvinfo
3594 vf_node_info = self.my_node_info.values()
3596 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3598 feedback_fn("* Verifying node status")
3602 for node_i in node_data_list:
3604 nimg = node_image[node]
3608 feedback_fn("* Skipping offline node %s" % (node,))
3612 if node == master_node:
3614 elif node_i.master_candidate:
3615 ntype = "master candidate"
3616 elif node_i.drained:
3622 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3624 msg = all_nvinfo[node].fail_msg
3625 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3628 nimg.rpc_fail = True
3631 nresult = all_nvinfo[node].payload
3633 nimg.call_ok = self._VerifyNode(node_i, nresult)
3634 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3635 self._VerifyNodeNetwork(node_i, nresult)
3636 self._VerifyNodeUserScripts(node_i, nresult)
3637 self._VerifyOob(node_i, nresult)
3638 self._VerifyFileStoragePaths(node_i, nresult,
3639 node == master_node)
3642 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3643 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3646 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3647 self._UpdateNodeInstances(node_i, nresult, nimg)
3648 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3649 self._UpdateNodeOS(node_i, nresult, nimg)
3651 if not nimg.os_fail:
3652 if refos_img is None:
3654 self._VerifyNodeOS(node_i, nimg, refos_img)
3655 self._VerifyNodeBridges(node_i, nresult, bridges)
3657 # Check whether all running instancies are primary for the node. (This
3658 # can no longer be done from _VerifyInstance below, since some of the
3659 # wrong instances could be from other node groups.)
3660 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3662 for inst in non_primary_inst:
3663 test = inst in self.all_inst_info
3664 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3665 "instance should not run on node %s", node_i.name)
3666 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3667 "node is running unknown instance %s", inst)
3669 self._VerifyGroupLVM(node_image, vg_name)
3671 for node, result in extra_lv_nvinfo.items():
3672 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3673 node_image[node], vg_name)
3675 feedback_fn("* Verifying instance status")
3676 for instance in self.my_inst_names:
3678 feedback_fn("* Verifying instance %s" % instance)
3679 inst_config = self.my_inst_info[instance]
3680 self._VerifyInstance(instance, inst_config, node_image,
3683 # If the instance is non-redundant we cannot survive losing its primary
3684 # node, so we are not N+1 compliant.
3685 if inst_config.disk_template not in constants.DTS_MIRRORED:
3686 i_non_redundant.append(instance)
3688 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3689 i_non_a_balanced.append(instance)
3691 feedback_fn("* Verifying orphan volumes")
3692 reserved = utils.FieldSet(*cluster.reserved_lvs)
3694 # We will get spurious "unknown volume" warnings if any node of this group
3695 # is secondary for an instance whose primary is in another group. To avoid
3696 # them, we find these instances and add their volumes to node_vol_should.
3697 for inst in self.all_inst_info.values():
3698 for secondary in inst.secondary_nodes:
3699 if (secondary in self.my_node_info
3700 and inst.name not in self.my_inst_info):
3701 inst.MapLVsByNode(node_vol_should)
3704 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3706 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3707 feedback_fn("* Verifying N+1 Memory redundancy")
3708 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3710 feedback_fn("* Other Notes")
3712 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3713 % len(i_non_redundant))
3715 if i_non_a_balanced:
3716 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3717 % len(i_non_a_balanced))
3720 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3723 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3726 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3730 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3731 """Analyze the post-hooks' result
3733 This method analyses the hook result, handles it, and sends some
3734 nicely-formatted feedback back to the user.
3736 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3737 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3738 @param hooks_results: the results of the multi-node hooks rpc call
3739 @param feedback_fn: function used send feedback back to the caller
3740 @param lu_result: previous Exec result
3741 @return: the new Exec result, based on the previous result
3745 # We only really run POST phase hooks, only for non-empty groups,
3746 # and are only interested in their results
3747 if not self.my_node_names:
3750 elif phase == constants.HOOKS_PHASE_POST:
3751 # Used to change hooks' output to proper indentation
3752 feedback_fn("* Hooks Results")
3753 assert hooks_results, "invalid result from hooks"
3755 for node_name in hooks_results:
3756 res = hooks_results[node_name]
3758 test = msg and not res.offline
3759 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3760 "Communication failure in hooks execution: %s", msg)
3761 if res.offline or msg:
3762 # No need to investigate payload if node is offline or gave
3765 for script, hkr, output in res.payload:
3766 test = hkr == constants.HKR_FAIL
3767 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3768 "Script %s failed, output:", script)
3770 output = self._HOOKS_INDENT_RE.sub(" ", output)
3771 feedback_fn("%s" % output)
3777 class LUClusterVerifyDisks(NoHooksLU):
3778 """Verifies the cluster disks status.
3783 def ExpandNames(self):
3784 self.share_locks = _ShareAll()
3785 self.needed_locks = {
3786 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3789 def Exec(self, feedback_fn):
3790 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3792 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3793 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3794 for group in group_names])
3797 class LUGroupVerifyDisks(NoHooksLU):
3798 """Verifies the status of all disks in a node group.
3803 def ExpandNames(self):
3804 # Raises errors.OpPrereqError on its own if group can't be found
3805 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3807 self.share_locks = _ShareAll()
3808 self.needed_locks = {
3809 locking.LEVEL_INSTANCE: [],
3810 locking.LEVEL_NODEGROUP: [],
3811 locking.LEVEL_NODE: [],
3813 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3814 # starts one instance of this opcode for every group, which means all
3815 # nodes will be locked for a short amount of time, so it's better to
3816 # acquire the node allocation lock as well.
3817 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3820 def DeclareLocks(self, level):
3821 if level == locking.LEVEL_INSTANCE:
3822 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3824 # Lock instances optimistically, needs verification once node and group
3825 # locks have been acquired
3826 self.needed_locks[locking.LEVEL_INSTANCE] = \
3827 self.cfg.GetNodeGroupInstances(self.group_uuid)
3829 elif level == locking.LEVEL_NODEGROUP:
3830 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3832 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3833 set([self.group_uuid] +
3834 # Lock all groups used by instances optimistically; this requires
3835 # going via the node before it's locked, requiring verification
3838 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3839 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3841 elif level == locking.LEVEL_NODE:
3842 # This will only lock the nodes in the group to be verified which contain
3844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3845 self._LockInstancesNodes()
3847 # Lock all nodes in group to be verified
3848 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3849 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3850 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3852 def CheckPrereq(self):
3853 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3854 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3855 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3857 assert self.group_uuid in owned_groups
3859 # Check if locked instances are still correct
3860 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3862 # Get instance information
3863 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3865 # Check if node groups for locked instances are still correct
3866 _CheckInstancesNodeGroups(self.cfg, self.instances,
3867 owned_groups, owned_nodes, self.group_uuid)
3869 def Exec(self, feedback_fn):
3870 """Verify integrity of cluster disks.
3872 @rtype: tuple of three items
3873 @return: a tuple of (dict of node-to-node_error, list of instances
3874 which need activate-disks, dict of instance: (node, volume) for
3879 res_instances = set()
3882 nv_dict = _MapInstanceDisksToNodes(
3883 [inst for inst in self.instances.values()
3884 if inst.admin_state == constants.ADMINST_UP])
3887 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3888 set(self.cfg.GetVmCapableNodeList()))
3890 node_lvs = self.rpc.call_lv_list(nodes, [])
3892 for (node, node_res) in node_lvs.items():
3893 if node_res.offline:
3896 msg = node_res.fail_msg
3898 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3899 res_nodes[node] = msg
3902 for lv_name, (_, _, lv_online) in node_res.payload.items():
3903 inst = nv_dict.pop((node, lv_name), None)
3904 if not (lv_online or inst is None):
3905 res_instances.add(inst)
3907 # any leftover items in nv_dict are missing LVs, let's arrange the data
3909 for key, inst in nv_dict.iteritems():
3910 res_missing.setdefault(inst, []).append(list(key))
3912 return (res_nodes, list(res_instances), res_missing)
3915 class LUClusterRepairDiskSizes(NoHooksLU):
3916 """Verifies the cluster disks sizes.
3921 def ExpandNames(self):
3922 if self.op.instances:
3923 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3924 # Not getting the node allocation lock as only a specific set of
3925 # instances (and their nodes) is going to be acquired
3926 self.needed_locks = {
3927 locking.LEVEL_NODE_RES: [],
3928 locking.LEVEL_INSTANCE: self.wanted_names,
3930 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3932 self.wanted_names = None
3933 self.needed_locks = {
3934 locking.LEVEL_NODE_RES: locking.ALL_SET,
3935 locking.LEVEL_INSTANCE: locking.ALL_SET,
3937 # This opcode is acquires the node locks for all instances
3938 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3941 self.share_locks = {
3942 locking.LEVEL_NODE_RES: 1,
3943 locking.LEVEL_INSTANCE: 0,
3944 locking.LEVEL_NODE_ALLOC: 1,
3947 def DeclareLocks(self, level):
3948 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3949 self._LockInstancesNodes(primary_only=True, level=level)
3951 def CheckPrereq(self):
3952 """Check prerequisites.
3954 This only checks the optional instance list against the existing names.
3957 if self.wanted_names is None:
3958 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3960 self.wanted_instances = \
3961 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3963 def _EnsureChildSizes(self, disk):
3964 """Ensure children of the disk have the needed disk size.
3966 This is valid mainly for DRBD8 and fixes an issue where the
3967 children have smaller disk size.
3969 @param disk: an L{ganeti.objects.Disk} object
3972 if disk.dev_type == constants.LD_DRBD8:
3973 assert disk.children, "Empty children for DRBD8?"
3974 fchild = disk.children[0]
3975 mismatch = fchild.size < disk.size
3977 self.LogInfo("Child disk has size %d, parent %d, fixing",
3978 fchild.size, disk.size)
3979 fchild.size = disk.size
3981 # and we recurse on this child only, not on the metadev
3982 return self._EnsureChildSizes(fchild) or mismatch
3986 def Exec(self, feedback_fn):
3987 """Verify the size of cluster disks.
3990 # TODO: check child disks too
3991 # TODO: check differences in size between primary/secondary nodes
3993 for instance in self.wanted_instances:
3994 pnode = instance.primary_node
3995 if pnode not in per_node_disks:
3996 per_node_disks[pnode] = []
3997 for idx, disk in enumerate(instance.disks):
3998 per_node_disks[pnode].append((instance, idx, disk))
4000 assert not (frozenset(per_node_disks.keys()) -
4001 self.owned_locks(locking.LEVEL_NODE_RES)), \
4002 "Not owning correct locks"
4003 assert not self.owned_locks(locking.LEVEL_NODE)
4006 for node, dskl in per_node_disks.items():
4007 newl = [v[2].Copy() for v in dskl]
4009 self.cfg.SetDiskID(dsk, node)
4010 result = self.rpc.call_blockdev_getsize(node, newl)
4012 self.LogWarning("Failure in blockdev_getsize call to node"
4013 " %s, ignoring", node)
4015 if len(result.payload) != len(dskl):
4016 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4017 " result.payload=%s", node, len(dskl), result.payload)
4018 self.LogWarning("Invalid result from node %s, ignoring node results",
4021 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4023 self.LogWarning("Disk %d of instance %s did not return size"
4024 " information, ignoring", idx, instance.name)
4026 if not isinstance(size, (int, long)):
4027 self.LogWarning("Disk %d of instance %s did not return valid"
4028 " size information, ignoring", idx, instance.name)
4031 if size != disk.size:
4032 self.LogInfo("Disk %d of instance %s has mismatched size,"
4033 " correcting: recorded %d, actual %d", idx,
4034 instance.name, disk.size, size)
4036 self.cfg.Update(instance, feedback_fn)
4037 changed.append((instance.name, idx, size))
4038 if self._EnsureChildSizes(disk):
4039 self.cfg.Update(instance, feedback_fn)
4040 changed.append((instance.name, idx, disk.size))
4044 class LUClusterRename(LogicalUnit):
4045 """Rename the cluster.
4048 HPATH = "cluster-rename"
4049 HTYPE = constants.HTYPE_CLUSTER
4051 def BuildHooksEnv(self):
4056 "OP_TARGET": self.cfg.GetClusterName(),
4057 "NEW_NAME": self.op.name,
4060 def BuildHooksNodes(self):
4061 """Build hooks nodes.
4064 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4066 def CheckPrereq(self):
4067 """Verify that the passed name is a valid one.
4070 hostname = netutils.GetHostname(name=self.op.name,
4071 family=self.cfg.GetPrimaryIPFamily())
4073 new_name = hostname.name
4074 self.ip = new_ip = hostname.ip
4075 old_name = self.cfg.GetClusterName()
4076 old_ip = self.cfg.GetMasterIP()
4077 if new_name == old_name and new_ip == old_ip:
4078 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4079 " cluster has changed",
4081 if new_ip != old_ip:
4082 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4083 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4084 " reachable on the network" %
4085 new_ip, errors.ECODE_NOTUNIQUE)
4087 self.op.name = new_name
4089 def Exec(self, feedback_fn):
4090 """Rename the cluster.
4093 clustername = self.op.name
4096 # shutdown the master IP
4097 master_params = self.cfg.GetMasterNetworkParameters()
4098 ems = self.cfg.GetUseExternalMipScript()
4099 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4101 result.Raise("Could not disable the master role")
4104 cluster = self.cfg.GetClusterInfo()
4105 cluster.cluster_name = clustername
4106 cluster.master_ip = new_ip
4107 self.cfg.Update(cluster, feedback_fn)
4109 # update the known hosts file
4110 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4111 node_list = self.cfg.GetOnlineNodeList()
4113 node_list.remove(master_params.name)
4116 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4118 master_params.ip = new_ip
4119 result = self.rpc.call_node_activate_master_ip(master_params.name,
4121 msg = result.fail_msg
4123 self.LogWarning("Could not re-enable the master role on"
4124 " the master, please restart manually: %s", msg)
4129 def _ValidateNetmask(cfg, netmask):
4130 """Checks if a netmask is valid.
4132 @type cfg: L{config.ConfigWriter}
4133 @param cfg: The cluster configuration
4135 @param netmask: the netmask to be verified
4136 @raise errors.OpPrereqError: if the validation fails
4139 ip_family = cfg.GetPrimaryIPFamily()
4141 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4142 except errors.ProgrammerError:
4143 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4144 ip_family, errors.ECODE_INVAL)
4145 if not ipcls.ValidateNetmask(netmask):
4146 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4147 (netmask), errors.ECODE_INVAL)
4150 class LUClusterSetParams(LogicalUnit):
4151 """Change the parameters of the cluster.
4154 HPATH = "cluster-modify"
4155 HTYPE = constants.HTYPE_CLUSTER
4158 def CheckArguments(self):
4162 if self.op.uid_pool:
4163 uidpool.CheckUidPool(self.op.uid_pool)
4165 if self.op.add_uids:
4166 uidpool.CheckUidPool(self.op.add_uids)
4168 if self.op.remove_uids:
4169 uidpool.CheckUidPool(self.op.remove_uids)
4171 if self.op.master_netmask is not None:
4172 _ValidateNetmask(self.cfg, self.op.master_netmask)
4174 if self.op.diskparams:
4175 for dt_params in self.op.diskparams.values():
4176 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4178 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4179 except errors.OpPrereqError, err:
4180 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4183 def ExpandNames(self):
4184 # FIXME: in the future maybe other cluster params won't require checking on
4185 # all nodes to be modified.
4186 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4187 # resource locks the right thing, shouldn't it be the BGL instead?
4188 self.needed_locks = {
4189 locking.LEVEL_NODE: locking.ALL_SET,
4190 locking.LEVEL_INSTANCE: locking.ALL_SET,
4191 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4192 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4194 self.share_locks = _ShareAll()
4196 def BuildHooksEnv(self):
4201 "OP_TARGET": self.cfg.GetClusterName(),
4202 "NEW_VG_NAME": self.op.vg_name,
4205 def BuildHooksNodes(self):
4206 """Build hooks nodes.
4209 mn = self.cfg.GetMasterNode()
4212 def CheckPrereq(self):
4213 """Check prerequisites.
4215 This checks whether the given params don't conflict and
4216 if the given volume group is valid.
4219 if self.op.vg_name is not None and not self.op.vg_name:
4220 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4221 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4222 " instances exist", errors.ECODE_INVAL)
4224 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4225 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4226 raise errors.OpPrereqError("Cannot disable drbd helper while"
4227 " drbd-based instances exist",
4230 node_list = self.owned_locks(locking.LEVEL_NODE)
4232 # if vg_name not None, checks given volume group on all nodes
4234 vglist = self.rpc.call_vg_list(node_list)
4235 for node in node_list:
4236 msg = vglist[node].fail_msg
4238 # ignoring down node
4239 self.LogWarning("Error while gathering data on node %s"
4240 " (ignoring node): %s", node, msg)
4242 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4244 constants.MIN_VG_SIZE)
4246 raise errors.OpPrereqError("Error on node '%s': %s" %
4247 (node, vgstatus), errors.ECODE_ENVIRON)
4249 if self.op.drbd_helper:
4250 # checks given drbd helper on all nodes
4251 helpers = self.rpc.call_drbd_helper(node_list)
4252 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4254 self.LogInfo("Not checking drbd helper on offline node %s", node)
4256 msg = helpers[node].fail_msg
4258 raise errors.OpPrereqError("Error checking drbd helper on node"
4259 " '%s': %s" % (node, msg),
4260 errors.ECODE_ENVIRON)
4261 node_helper = helpers[node].payload
4262 if node_helper != self.op.drbd_helper:
4263 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4264 (node, node_helper), errors.ECODE_ENVIRON)
4266 self.cluster = cluster = self.cfg.GetClusterInfo()
4267 # validate params changes
4268 if self.op.beparams:
4269 objects.UpgradeBeParams(self.op.beparams)
4270 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4271 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4273 if self.op.ndparams:
4274 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4275 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4277 # TODO: we need a more general way to handle resetting
4278 # cluster-level parameters to default values
4279 if self.new_ndparams["oob_program"] == "":
4280 self.new_ndparams["oob_program"] = \
4281 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4283 if self.op.hv_state:
4284 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4285 self.cluster.hv_state_static)
4286 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4287 for hv, values in new_hv_state.items())
4289 if self.op.disk_state:
4290 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4291 self.cluster.disk_state_static)
4292 self.new_disk_state = \
4293 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4294 for name, values in svalues.items()))
4295 for storage, svalues in new_disk_state.items())
4298 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4301 all_instances = self.cfg.GetAllInstancesInfo().values()
4303 for group in self.cfg.GetAllNodeGroupsInfo().values():
4304 instances = frozenset([inst for inst in all_instances
4305 if compat.any(node in group.members
4306 for node in inst.all_nodes)])
4307 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4308 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4309 new = _ComputeNewInstanceViolations(ipol,
4310 new_ipolicy, instances, self.cfg)
4312 violations.update(new)
4315 self.LogWarning("After the ipolicy change the following instances"
4316 " violate them: %s",
4317 utils.CommaJoin(utils.NiceSort(violations)))
4319 if self.op.nicparams:
4320 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4321 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4322 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4325 # check all instances for consistency
4326 for instance in self.cfg.GetAllInstancesInfo().values():
4327 for nic_idx, nic in enumerate(instance.nics):
4328 params_copy = copy.deepcopy(nic.nicparams)
4329 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4331 # check parameter syntax
4333 objects.NIC.CheckParameterSyntax(params_filled)
4334 except errors.ConfigurationError, err:
4335 nic_errors.append("Instance %s, nic/%d: %s" %
4336 (instance.name, nic_idx, err))
4338 # if we're moving instances to routed, check that they have an ip
4339 target_mode = params_filled[constants.NIC_MODE]
4340 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4341 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4342 " address" % (instance.name, nic_idx))
4344 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4345 "\n".join(nic_errors), errors.ECODE_INVAL)
4347 # hypervisor list/parameters
4348 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4349 if self.op.hvparams:
4350 for hv_name, hv_dict in self.op.hvparams.items():
4351 if hv_name not in self.new_hvparams:
4352 self.new_hvparams[hv_name] = hv_dict
4354 self.new_hvparams[hv_name].update(hv_dict)
4356 # disk template parameters
4357 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4358 if self.op.diskparams:
4359 for dt_name, dt_params in self.op.diskparams.items():
4360 if dt_name not in self.op.diskparams:
4361 self.new_diskparams[dt_name] = dt_params
4363 self.new_diskparams[dt_name].update(dt_params)
4365 # os hypervisor parameters
4366 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4368 for os_name, hvs in self.op.os_hvp.items():
4369 if os_name not in self.new_os_hvp:
4370 self.new_os_hvp[os_name] = hvs
4372 for hv_name, hv_dict in hvs.items():
4374 # Delete if it exists
4375 self.new_os_hvp[os_name].pop(hv_name, None)
4376 elif hv_name not in self.new_os_hvp[os_name]:
4377 self.new_os_hvp[os_name][hv_name] = hv_dict
4379 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4382 self.new_osp = objects.FillDict(cluster.osparams, {})
4383 if self.op.osparams:
4384 for os_name, osp in self.op.osparams.items():
4385 if os_name not in self.new_osp:
4386 self.new_osp[os_name] = {}
4388 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4391 if not self.new_osp[os_name]:
4392 # we removed all parameters
4393 del self.new_osp[os_name]
4395 # check the parameter validity (remote check)
4396 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4397 os_name, self.new_osp[os_name])
4399 # changes to the hypervisor list
4400 if self.op.enabled_hypervisors is not None:
4401 self.hv_list = self.op.enabled_hypervisors
4402 for hv in self.hv_list:
4403 # if the hypervisor doesn't already exist in the cluster
4404 # hvparams, we initialize it to empty, and then (in both
4405 # cases) we make sure to fill the defaults, as we might not
4406 # have a complete defaults list if the hypervisor wasn't
4408 if hv not in new_hvp:
4410 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4411 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4413 self.hv_list = cluster.enabled_hypervisors
4415 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4416 # either the enabled list has changed, or the parameters have, validate
4417 for hv_name, hv_params in self.new_hvparams.items():
4418 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4419 (self.op.enabled_hypervisors and
4420 hv_name in self.op.enabled_hypervisors)):
4421 # either this is a new hypervisor, or its parameters have changed
4422 hv_class = hypervisor.GetHypervisorClass(hv_name)
4423 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4424 hv_class.CheckParameterSyntax(hv_params)
4425 _CheckHVParams(self, node_list, hv_name, hv_params)
4427 self._CheckDiskTemplateConsistency()
4430 # no need to check any newly-enabled hypervisors, since the
4431 # defaults have already been checked in the above code-block
4432 for os_name, os_hvp in self.new_os_hvp.items():
4433 for hv_name, hv_params in os_hvp.items():
4434 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4435 # we need to fill in the new os_hvp on top of the actual hv_p
4436 cluster_defaults = self.new_hvparams.get(hv_name, {})
4437 new_osp = objects.FillDict(cluster_defaults, hv_params)
4438 hv_class = hypervisor.GetHypervisorClass(hv_name)
4439 hv_class.CheckParameterSyntax(new_osp)
4440 _CheckHVParams(self, node_list, hv_name, new_osp)
4442 if self.op.default_iallocator:
4443 alloc_script = utils.FindFile(self.op.default_iallocator,
4444 constants.IALLOCATOR_SEARCH_PATH,
4446 if alloc_script is None:
4447 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4448 " specified" % self.op.default_iallocator,
4451 def _CheckDiskTemplateConsistency(self):
4452 """Check whether the disk templates that are going to be disabled
4453 are still in use by some instances.
4456 if self.op.enabled_disk_templates:
4457 cluster = self.cfg.GetClusterInfo()
4458 instances = self.cfg.GetAllInstancesInfo()
4460 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
4461 - set(self.op.enabled_disk_templates)
4462 for instance in instances.itervalues():
4463 if instance.disk_template in disk_templates_to_remove:
4464 raise errors.OpPrereqError("Cannot disable disk template '%s',"
4465 " because instance '%s' is using it." %
4466 (instance.disk_template, instance.name))
4468 def Exec(self, feedback_fn):
4469 """Change the parameters of the cluster.
4472 if self.op.vg_name is not None:
4473 new_volume = self.op.vg_name
4476 if new_volume != self.cfg.GetVGName():
4477 self.cfg.SetVGName(new_volume)
4479 feedback_fn("Cluster LVM configuration already in desired"
4480 " state, not changing")
4481 if self.op.drbd_helper is not None:
4482 new_helper = self.op.drbd_helper
4485 if new_helper != self.cfg.GetDRBDHelper():
4486 self.cfg.SetDRBDHelper(new_helper)
4488 feedback_fn("Cluster DRBD helper already in desired state,"
4490 if self.op.hvparams:
4491 self.cluster.hvparams = self.new_hvparams
4493 self.cluster.os_hvp = self.new_os_hvp
4494 if self.op.enabled_hypervisors is not None:
4495 self.cluster.hvparams = self.new_hvparams
4496 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4497 if self.op.enabled_disk_templates:
4498 self.cluster.enabled_disk_templates = \
4499 list(set(self.op.enabled_disk_templates))
4500 if self.op.beparams:
4501 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4502 if self.op.nicparams:
4503 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4505 self.cluster.ipolicy = self.new_ipolicy
4506 if self.op.osparams:
4507 self.cluster.osparams = self.new_osp
4508 if self.op.ndparams:
4509 self.cluster.ndparams = self.new_ndparams
4510 if self.op.diskparams:
4511 self.cluster.diskparams = self.new_diskparams
4512 if self.op.hv_state:
4513 self.cluster.hv_state_static = self.new_hv_state
4514 if self.op.disk_state:
4515 self.cluster.disk_state_static = self.new_disk_state
4517 if self.op.candidate_pool_size is not None:
4518 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4519 # we need to update the pool size here, otherwise the save will fail
4520 _AdjustCandidatePool(self, [])
4522 if self.op.maintain_node_health is not None:
4523 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4524 feedback_fn("Note: CONFD was disabled at build time, node health"
4525 " maintenance is not useful (still enabling it)")
4526 self.cluster.maintain_node_health = self.op.maintain_node_health
4528 if self.op.prealloc_wipe_disks is not None:
4529 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4531 if self.op.add_uids is not None:
4532 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4534 if self.op.remove_uids is not None:
4535 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4537 if self.op.uid_pool is not None:
4538 self.cluster.uid_pool = self.op.uid_pool
4540 if self.op.default_iallocator is not None:
4541 self.cluster.default_iallocator = self.op.default_iallocator
4543 if self.op.reserved_lvs is not None:
4544 self.cluster.reserved_lvs = self.op.reserved_lvs
4546 if self.op.use_external_mip_script is not None:
4547 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4549 def helper_os(aname, mods, desc):
4551 lst = getattr(self.cluster, aname)
4552 for key, val in mods:
4553 if key == constants.DDM_ADD:
4555 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4558 elif key == constants.DDM_REMOVE:
4562 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4564 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4566 if self.op.hidden_os:
4567 helper_os("hidden_os", self.op.hidden_os, "hidden")
4569 if self.op.blacklisted_os:
4570 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4572 if self.op.master_netdev:
4573 master_params = self.cfg.GetMasterNetworkParameters()
4574 ems = self.cfg.GetUseExternalMipScript()
4575 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4576 self.cluster.master_netdev)
4577 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4579 result.Raise("Could not disable the master ip")
4580 feedback_fn("Changing master_netdev from %s to %s" %
4581 (master_params.netdev, self.op.master_netdev))
4582 self.cluster.master_netdev = self.op.master_netdev
4584 if self.op.master_netmask:
4585 master_params = self.cfg.GetMasterNetworkParameters()
4586 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4587 result = self.rpc.call_node_change_master_netmask(master_params.name,
4588 master_params.netmask,
4589 self.op.master_netmask,
4591 master_params.netdev)
4593 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4596 self.cluster.master_netmask = self.op.master_netmask
4598 self.cfg.Update(self.cluster, feedback_fn)
4600 if self.op.master_netdev:
4601 master_params = self.cfg.GetMasterNetworkParameters()
4602 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4603 self.op.master_netdev)
4604 ems = self.cfg.GetUseExternalMipScript()
4605 result = self.rpc.call_node_activate_master_ip(master_params.name,
4608 self.LogWarning("Could not re-enable the master ip on"
4609 " the master, please restart manually: %s",
4613 def _UploadHelper(lu, nodes, fname):
4614 """Helper for uploading a file and showing warnings.
4617 if os.path.exists(fname):
4618 result = lu.rpc.call_upload_file(nodes, fname)
4619 for to_node, to_result in result.items():
4620 msg = to_result.fail_msg
4622 msg = ("Copy of file %s to node %s failed: %s" %
4623 (fname, to_node, msg))
4627 def _ComputeAncillaryFiles(cluster, redist):
4628 """Compute files external to Ganeti which need to be consistent.
4630 @type redist: boolean
4631 @param redist: Whether to include files which need to be redistributed
4634 # Compute files for all nodes
4636 pathutils.SSH_KNOWN_HOSTS_FILE,
4637 pathutils.CONFD_HMAC_KEY,
4638 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4639 pathutils.SPICE_CERT_FILE,
4640 pathutils.SPICE_CACERT_FILE,
4641 pathutils.RAPI_USERS_FILE,
4645 # we need to ship at least the RAPI certificate
4646 files_all.add(pathutils.RAPI_CERT_FILE)
4648 files_all.update(pathutils.ALL_CERT_FILES)
4649 files_all.update(ssconf.SimpleStore().GetFileList())
4651 if cluster.modify_etc_hosts:
4652 files_all.add(pathutils.ETC_HOSTS)
4654 if cluster.use_external_mip_script:
4655 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4657 # Files which are optional, these must:
4658 # - be present in one other category as well
4659 # - either exist or not exist on all nodes of that category (mc, vm all)
4661 pathutils.RAPI_USERS_FILE,
4664 # Files which should only be on master candidates
4668 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4672 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4673 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4674 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4676 # Files which should only be on VM-capable nodes
4679 for hv_name in cluster.enabled_hypervisors
4681 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4685 for hv_name in cluster.enabled_hypervisors
4687 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4689 # Filenames in each category must be unique
4690 all_files_set = files_all | files_mc | files_vm
4691 assert (len(all_files_set) ==
4692 sum(map(len, [files_all, files_mc, files_vm]))), \
4693 "Found file listed in more than one file list"
4695 # Optional files must be present in one other category
4696 assert all_files_set.issuperset(files_opt), \
4697 "Optional file not in a different required list"
4699 # This one file should never ever be re-distributed via RPC
4700 assert not (redist and
4701 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4703 return (files_all, files_opt, files_mc, files_vm)
4706 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4707 """Distribute additional files which are part of the cluster configuration.
4709 ConfigWriter takes care of distributing the config and ssconf files, but
4710 there are more files which should be distributed to all nodes. This function
4711 makes sure those are copied.
4713 @param lu: calling logical unit
4714 @param additional_nodes: list of nodes not in the config to distribute to
4715 @type additional_vm: boolean
4716 @param additional_vm: whether the additional nodes are vm-capable or not
4719 # Gather target nodes
4720 cluster = lu.cfg.GetClusterInfo()
4721 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4723 online_nodes = lu.cfg.GetOnlineNodeList()
4724 online_set = frozenset(online_nodes)
4725 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4727 if additional_nodes is not None:
4728 online_nodes.extend(additional_nodes)
4730 vm_nodes.extend(additional_nodes)
4732 # Never distribute to master node
4733 for nodelist in [online_nodes, vm_nodes]:
4734 if master_info.name in nodelist:
4735 nodelist.remove(master_info.name)
4738 (files_all, _, files_mc, files_vm) = \
4739 _ComputeAncillaryFiles(cluster, True)
4741 # Never re-distribute configuration file from here
4742 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4743 pathutils.CLUSTER_CONF_FILE in files_vm)
4744 assert not files_mc, "Master candidates not handled in this function"
4747 (online_nodes, files_all),
4748 (vm_nodes, files_vm),
4752 for (node_list, files) in filemap:
4754 _UploadHelper(lu, node_list, fname)
4757 class LUClusterRedistConf(NoHooksLU):
4758 """Force the redistribution of cluster configuration.
4760 This is a very simple LU.
4765 def ExpandNames(self):
4766 self.needed_locks = {
4767 locking.LEVEL_NODE: locking.ALL_SET,
4768 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4770 self.share_locks = _ShareAll()
4772 def Exec(self, feedback_fn):
4773 """Redistribute the configuration.
4776 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4777 _RedistributeAncillaryFiles(self)
4780 class LUClusterActivateMasterIp(NoHooksLU):
4781 """Activate the master IP on the master node.
4784 def Exec(self, feedback_fn):
4785 """Activate the master IP.
4788 master_params = self.cfg.GetMasterNetworkParameters()
4789 ems = self.cfg.GetUseExternalMipScript()
4790 result = self.rpc.call_node_activate_master_ip(master_params.name,
4792 result.Raise("Could not activate the master IP")
4795 class LUClusterDeactivateMasterIp(NoHooksLU):
4796 """Deactivate the master IP on the master node.
4799 def Exec(self, feedback_fn):
4800 """Deactivate the master IP.
4803 master_params = self.cfg.GetMasterNetworkParameters()
4804 ems = self.cfg.GetUseExternalMipScript()
4805 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4807 result.Raise("Could not deactivate the master IP")
4810 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4811 """Sleep and poll for an instance's disk to sync.
4814 if not instance.disks or disks is not None and not disks:
4817 disks = _ExpandCheckDisks(instance, disks)
4820 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4822 node = instance.primary_node
4825 lu.cfg.SetDiskID(dev, node)
4827 # TODO: Convert to utils.Retry
4830 degr_retries = 10 # in seconds, as we sleep 1 second each time
4834 cumul_degraded = False
4835 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4836 msg = rstats.fail_msg
4838 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4841 raise errors.RemoteError("Can't contact node %s for mirror data,"
4842 " aborting." % node)
4845 rstats = rstats.payload
4847 for i, mstat in enumerate(rstats):
4849 lu.LogWarning("Can't compute data for node %s/%s",
4850 node, disks[i].iv_name)
4853 cumul_degraded = (cumul_degraded or
4854 (mstat.is_degraded and mstat.sync_percent is None))
4855 if mstat.sync_percent is not None:
4857 if mstat.estimated_time is not None:
4858 rem_time = ("%s remaining (estimated)" %
4859 utils.FormatSeconds(mstat.estimated_time))
4860 max_time = mstat.estimated_time
4862 rem_time = "no time estimate"
4863 lu.LogInfo("- device %s: %5.2f%% done, %s",
4864 disks[i].iv_name, mstat.sync_percent, rem_time)
4866 # if we're done but degraded, let's do a few small retries, to
4867 # make sure we see a stable and not transient situation; therefore
4868 # we force restart of the loop
4869 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4870 logging.info("Degraded disks found, %d retries left", degr_retries)
4878 time.sleep(min(60, max_time))
4881 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4883 return not cumul_degraded
4886 def _BlockdevFind(lu, node, dev, instance):
4887 """Wrapper around call_blockdev_find to annotate diskparams.
4889 @param lu: A reference to the lu object
4890 @param node: The node to call out
4891 @param dev: The device to find
4892 @param instance: The instance object the device belongs to
4893 @returns The result of the rpc call
4896 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4897 return lu.rpc.call_blockdev_find(node, disk)
4900 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4901 """Wrapper around L{_CheckDiskConsistencyInner}.
4904 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4905 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4909 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4911 """Check that mirrors are not degraded.
4913 @attention: The device has to be annotated already.
4915 The ldisk parameter, if True, will change the test from the
4916 is_degraded attribute (which represents overall non-ok status for
4917 the device(s)) to the ldisk (representing the local storage status).
4920 lu.cfg.SetDiskID(dev, node)
4924 if on_primary or dev.AssembleOnSecondary():
4925 rstats = lu.rpc.call_blockdev_find(node, dev)
4926 msg = rstats.fail_msg
4928 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4930 elif not rstats.payload:
4931 lu.LogWarning("Can't find disk on node %s", node)
4935 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4937 result = result and not rstats.payload.is_degraded
4940 for child in dev.children:
4941 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4947 class LUOobCommand(NoHooksLU):
4948 """Logical unit for OOB handling.
4952 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4954 def ExpandNames(self):
4955 """Gather locks we need.
4958 if self.op.node_names:
4959 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4960 lock_names = self.op.node_names
4962 lock_names = locking.ALL_SET
4964 self.needed_locks = {
4965 locking.LEVEL_NODE: lock_names,
4968 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4970 if not self.op.node_names:
4971 # Acquire node allocation lock only if all nodes are affected
4972 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4974 def CheckPrereq(self):
4975 """Check prerequisites.
4978 - the node exists in the configuration
4981 Any errors are signaled by raising errors.OpPrereqError.
4985 self.master_node = self.cfg.GetMasterNode()
4987 assert self.op.power_delay >= 0.0
4989 if self.op.node_names:
4990 if (self.op.command in self._SKIP_MASTER and
4991 self.master_node in self.op.node_names):
4992 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4993 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4995 if master_oob_handler:
4996 additional_text = ("run '%s %s %s' if you want to operate on the"
4997 " master regardless") % (master_oob_handler,
5001 additional_text = "it does not support out-of-band operations"
5003 raise errors.OpPrereqError(("Operating on the master node %s is not"
5004 " allowed for %s; %s") %
5005 (self.master_node, self.op.command,
5006 additional_text), errors.ECODE_INVAL)
5008 self.op.node_names = self.cfg.GetNodeList()
5009 if self.op.command in self._SKIP_MASTER:
5010 self.op.node_names.remove(self.master_node)
5012 if self.op.command in self._SKIP_MASTER:
5013 assert self.master_node not in self.op.node_names
5015 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
5017 raise errors.OpPrereqError("Node %s not found" % node_name,
5020 self.nodes.append(node)
5022 if (not self.op.ignore_status and
5023 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
5024 raise errors.OpPrereqError(("Cannot power off node %s because it is"
5025 " not marked offline") % node_name,
5028 def Exec(self, feedback_fn):
5029 """Execute OOB and return result if we expect any.
5032 master_node = self.master_node
5035 for idx, node in enumerate(utils.NiceSort(self.nodes,
5036 key=lambda node: node.name)):
5037 node_entry = [(constants.RS_NORMAL, node.name)]
5038 ret.append(node_entry)
5040 oob_program = _SupportsOob(self.cfg, node)
5043 node_entry.append((constants.RS_UNAVAIL, None))
5046 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5047 self.op.command, oob_program, node.name)
5048 result = self.rpc.call_run_oob(master_node, oob_program,
5049 self.op.command, node.name,
5053 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5054 node.name, result.fail_msg)
5055 node_entry.append((constants.RS_NODATA, None))
5058 self._CheckPayload(result)
5059 except errors.OpExecError, err:
5060 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5062 node_entry.append((constants.RS_NODATA, None))
5064 if self.op.command == constants.OOB_HEALTH:
5065 # For health we should log important events
5066 for item, status in result.payload:
5067 if status in [constants.OOB_STATUS_WARNING,
5068 constants.OOB_STATUS_CRITICAL]:
5069 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5070 item, node.name, status)
5072 if self.op.command == constants.OOB_POWER_ON:
5074 elif self.op.command == constants.OOB_POWER_OFF:
5075 node.powered = False
5076 elif self.op.command == constants.OOB_POWER_STATUS:
5077 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5078 if powered != node.powered:
5079 logging.warning(("Recorded power state (%s) of node '%s' does not"
5080 " match actual power state (%s)"), node.powered,
5083 # For configuration changing commands we should update the node
5084 if self.op.command in (constants.OOB_POWER_ON,
5085 constants.OOB_POWER_OFF):
5086 self.cfg.Update(node, feedback_fn)
5088 node_entry.append((constants.RS_NORMAL, result.payload))
5090 if (self.op.command == constants.OOB_POWER_ON and
5091 idx < len(self.nodes) - 1):
5092 time.sleep(self.op.power_delay)
5096 def _CheckPayload(self, result):
5097 """Checks if the payload is valid.
5099 @param result: RPC result
5100 @raises errors.OpExecError: If payload is not valid
5104 if self.op.command == constants.OOB_HEALTH:
5105 if not isinstance(result.payload, list):
5106 errs.append("command 'health' is expected to return a list but got %s" %
5107 type(result.payload))
5109 for item, status in result.payload:
5110 if status not in constants.OOB_STATUSES:
5111 errs.append("health item '%s' has invalid status '%s'" %
5114 if self.op.command == constants.OOB_POWER_STATUS:
5115 if not isinstance(result.payload, dict):
5116 errs.append("power-status is expected to return a dict but got %s" %
5117 type(result.payload))
5119 if self.op.command in [
5120 constants.OOB_POWER_ON,
5121 constants.OOB_POWER_OFF,
5122 constants.OOB_POWER_CYCLE,
5124 if result.payload is not None:
5125 errs.append("%s is expected to not return payload but got '%s'" %
5126 (self.op.command, result.payload))
5129 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5130 utils.CommaJoin(errs))
5133 class _OsQuery(_QueryBase):
5134 FIELDS = query.OS_FIELDS
5136 def ExpandNames(self, lu):
5137 # Lock all nodes in shared mode
5138 # Temporary removal of locks, should be reverted later
5139 # TODO: reintroduce locks when they are lighter-weight
5140 lu.needed_locks = {}
5141 #self.share_locks[locking.LEVEL_NODE] = 1
5142 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5144 # The following variables interact with _QueryBase._GetNames
5146 self.wanted = self.names
5148 self.wanted = locking.ALL_SET
5150 self.do_locking = self.use_locking
5152 def DeclareLocks(self, lu, level):
5156 def _DiagnoseByOS(rlist):
5157 """Remaps a per-node return list into an a per-os per-node dictionary
5159 @param rlist: a map with node names as keys and OS objects as values
5162 @return: a dictionary with osnames as keys and as value another
5163 map, with nodes as keys and tuples of (path, status, diagnose,
5164 variants, parameters, api_versions) as values, eg::
5166 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5167 (/srv/..., False, "invalid api")],
5168 "node2": [(/srv/..., True, "", [], [])]}
5173 # we build here the list of nodes that didn't fail the RPC (at RPC
5174 # level), so that nodes with a non-responding node daemon don't
5175 # make all OSes invalid
5176 good_nodes = [node_name for node_name in rlist
5177 if not rlist[node_name].fail_msg]
5178 for node_name, nr in rlist.items():
5179 if nr.fail_msg or not nr.payload:
5181 for (name, path, status, diagnose, variants,
5182 params, api_versions) in nr.payload:
5183 if name not in all_os:
5184 # build a list of nodes for this os containing empty lists
5185 # for each node in node_list
5187 for nname in good_nodes:
5188 all_os[name][nname] = []
5189 # convert params from [name, help] to (name, help)
5190 params = [tuple(v) for v in params]
5191 all_os[name][node_name].append((path, status, diagnose,
5192 variants, params, api_versions))
5195 def _GetQueryData(self, lu):
5196 """Computes the list of nodes and their attributes.
5199 # Locking is not used
5200 assert not (compat.any(lu.glm.is_owned(level)
5201 for level in locking.LEVELS
5202 if level != locking.LEVEL_CLUSTER) or
5203 self.do_locking or self.use_locking)
5205 valid_nodes = [node.name
5206 for node in lu.cfg.GetAllNodesInfo().values()
5207 if not node.offline and node.vm_capable]
5208 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5209 cluster = lu.cfg.GetClusterInfo()
5213 for (os_name, os_data) in pol.items():
5214 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5215 hidden=(os_name in cluster.hidden_os),
5216 blacklisted=(os_name in cluster.blacklisted_os))
5220 api_versions = set()
5222 for idx, osl in enumerate(os_data.values()):
5223 info.valid = bool(info.valid and osl and osl[0][1])
5227 (node_variants, node_params, node_api) = osl[0][3:6]
5230 variants.update(node_variants)
5231 parameters.update(node_params)
5232 api_versions.update(node_api)
5234 # Filter out inconsistent values
5235 variants.intersection_update(node_variants)
5236 parameters.intersection_update(node_params)
5237 api_versions.intersection_update(node_api)
5239 info.variants = list(variants)
5240 info.parameters = list(parameters)
5241 info.api_versions = list(api_versions)
5243 data[os_name] = info
5245 # Prepare data in requested order
5246 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5250 class LUOsDiagnose(NoHooksLU):
5251 """Logical unit for OS diagnose/query.
5257 def _BuildFilter(fields, names):
5258 """Builds a filter for querying OSes.
5261 name_filter = qlang.MakeSimpleFilter("name", names)
5263 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5264 # respective field is not requested
5265 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5266 for fname in ["hidden", "blacklisted"]
5267 if fname not in fields]
5268 if "valid" not in fields:
5269 status_filter.append([qlang.OP_TRUE, "valid"])
5272 status_filter.insert(0, qlang.OP_AND)
5274 status_filter = None
5276 if name_filter and status_filter:
5277 return [qlang.OP_AND, name_filter, status_filter]
5281 return status_filter
5283 def CheckArguments(self):
5284 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5285 self.op.output_fields, False)
5287 def ExpandNames(self):
5288 self.oq.ExpandNames(self)
5290 def Exec(self, feedback_fn):
5291 return self.oq.OldStyleQuery(self)
5294 class _ExtStorageQuery(_QueryBase):
5295 FIELDS = query.EXTSTORAGE_FIELDS
5297 def ExpandNames(self, lu):
5298 # Lock all nodes in shared mode
5299 # Temporary removal of locks, should be reverted later
5300 # TODO: reintroduce locks when they are lighter-weight
5301 lu.needed_locks = {}
5302 #self.share_locks[locking.LEVEL_NODE] = 1
5303 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5305 # The following variables interact with _QueryBase._GetNames
5307 self.wanted = self.names
5309 self.wanted = locking.ALL_SET
5311 self.do_locking = self.use_locking
5313 def DeclareLocks(self, lu, level):
5317 def _DiagnoseByProvider(rlist):
5318 """Remaps a per-node return list into an a per-provider per-node dictionary
5320 @param rlist: a map with node names as keys and ExtStorage objects as values
5323 @return: a dictionary with extstorage providers as keys and as
5324 value another map, with nodes as keys and tuples of
5325 (path, status, diagnose, parameters) as values, eg::
5327 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5328 "node2": [(/srv/..., False, "missing file")]
5329 "node3": [(/srv/..., True, "", [])]
5334 # we build here the list of nodes that didn't fail the RPC (at RPC
5335 # level), so that nodes with a non-responding node daemon don't
5336 # make all OSes invalid
5337 good_nodes = [node_name for node_name in rlist
5338 if not rlist[node_name].fail_msg]
5339 for node_name, nr in rlist.items():
5340 if nr.fail_msg or not nr.payload:
5342 for (name, path, status, diagnose, params) in nr.payload:
5343 if name not in all_es:
5344 # build a list of nodes for this os containing empty lists
5345 # for each node in node_list
5347 for nname in good_nodes:
5348 all_es[name][nname] = []
5349 # convert params from [name, help] to (name, help)
5350 params = [tuple(v) for v in params]
5351 all_es[name][node_name].append((path, status, diagnose, params))
5354 def _GetQueryData(self, lu):
5355 """Computes the list of nodes and their attributes.
5358 # Locking is not used
5359 assert not (compat.any(lu.glm.is_owned(level)
5360 for level in locking.LEVELS
5361 if level != locking.LEVEL_CLUSTER) or
5362 self.do_locking or self.use_locking)
5364 valid_nodes = [node.name
5365 for node in lu.cfg.GetAllNodesInfo().values()
5366 if not node.offline and node.vm_capable]
5367 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5371 nodegroup_list = lu.cfg.GetNodeGroupList()
5373 for (es_name, es_data) in pol.items():
5374 # For every provider compute the nodegroup validity.
5375 # To do this we need to check the validity of each node in es_data
5376 # and then construct the corresponding nodegroup dict:
5377 # { nodegroup1: status
5378 # nodegroup2: status
5381 for nodegroup in nodegroup_list:
5382 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5384 nodegroup_nodes = ndgrp.members
5385 nodegroup_name = ndgrp.name
5388 for node in nodegroup_nodes:
5389 if node in valid_nodes:
5390 if es_data[node] != []:
5391 node_status = es_data[node][0][1]
5392 node_statuses.append(node_status)
5394 node_statuses.append(False)
5396 if False in node_statuses:
5397 ndgrp_data[nodegroup_name] = False
5399 ndgrp_data[nodegroup_name] = True
5401 # Compute the provider's parameters
5403 for idx, esl in enumerate(es_data.values()):
5404 valid = bool(esl and esl[0][1])
5408 node_params = esl[0][3]
5411 parameters.update(node_params)
5413 # Filter out inconsistent values
5414 parameters.intersection_update(node_params)
5416 params = list(parameters)
5418 # Now fill all the info for this provider
5419 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5420 nodegroup_status=ndgrp_data,
5423 data[es_name] = info
5425 # Prepare data in requested order
5426 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5430 class LUExtStorageDiagnose(NoHooksLU):
5431 """Logical unit for ExtStorage diagnose/query.
5436 def CheckArguments(self):
5437 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5438 self.op.output_fields, False)
5440 def ExpandNames(self):
5441 self.eq.ExpandNames(self)
5443 def Exec(self, feedback_fn):
5444 return self.eq.OldStyleQuery(self)
5447 class LUNodeRemove(LogicalUnit):
5448 """Logical unit for removing a node.
5451 HPATH = "node-remove"
5452 HTYPE = constants.HTYPE_NODE
5454 def BuildHooksEnv(self):
5459 "OP_TARGET": self.op.node_name,
5460 "NODE_NAME": self.op.node_name,
5463 def BuildHooksNodes(self):
5464 """Build hooks nodes.
5466 This doesn't run on the target node in the pre phase as a failed
5467 node would then be impossible to remove.
5470 all_nodes = self.cfg.GetNodeList()
5472 all_nodes.remove(self.op.node_name)
5475 return (all_nodes, all_nodes)
5477 def CheckPrereq(self):
5478 """Check prerequisites.
5481 - the node exists in the configuration
5482 - it does not have primary or secondary instances
5483 - it's not the master
5485 Any errors are signaled by raising errors.OpPrereqError.
5488 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5489 node = self.cfg.GetNodeInfo(self.op.node_name)
5490 assert node is not None
5492 masternode = self.cfg.GetMasterNode()
5493 if node.name == masternode:
5494 raise errors.OpPrereqError("Node is the master node, failover to another"
5495 " node is required", errors.ECODE_INVAL)
5497 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5498 if node.name in instance.all_nodes:
5499 raise errors.OpPrereqError("Instance %s is still running on the node,"
5500 " please remove first" % instance_name,
5502 self.op.node_name = node.name
5505 def Exec(self, feedback_fn):
5506 """Removes the node from the cluster.
5510 logging.info("Stopping the node daemon and removing configs from node %s",
5513 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5515 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5518 # Promote nodes to master candidate as needed
5519 _AdjustCandidatePool(self, exceptions=[node.name])
5520 self.context.RemoveNode(node.name)
5522 # Run post hooks on the node before it's removed
5523 _RunPostHook(self, node.name)
5525 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5526 msg = result.fail_msg
5528 self.LogWarning("Errors encountered on the remote node while leaving"
5529 " the cluster: %s", msg)
5531 # Remove node from our /etc/hosts
5532 if self.cfg.GetClusterInfo().modify_etc_hosts:
5533 master_node = self.cfg.GetMasterNode()
5534 result = self.rpc.call_etc_hosts_modify(master_node,
5535 constants.ETC_HOSTS_REMOVE,
5537 result.Raise("Can't update hosts file with new host data")
5538 _RedistributeAncillaryFiles(self)
5541 class _NodeQuery(_QueryBase):
5542 FIELDS = query.NODE_FIELDS
5544 def ExpandNames(self, lu):
5545 lu.needed_locks = {}
5546 lu.share_locks = _ShareAll()
5549 self.wanted = _GetWantedNodes(lu, self.names)
5551 self.wanted = locking.ALL_SET
5553 self.do_locking = (self.use_locking and
5554 query.NQ_LIVE in self.requested_data)
5557 # If any non-static field is requested we need to lock the nodes
5558 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5559 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5561 def DeclareLocks(self, lu, level):
5564 def _GetQueryData(self, lu):
5565 """Computes the list of nodes and their attributes.
5568 all_info = lu.cfg.GetAllNodesInfo()
5570 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5572 # Gather data as requested
5573 if query.NQ_LIVE in self.requested_data:
5574 # filter out non-vm_capable nodes
5575 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5577 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5578 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5579 [lu.cfg.GetHypervisorType()], es_flags)
5580 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5581 for (name, nresult) in node_data.items()
5582 if not nresult.fail_msg and nresult.payload)
5586 if query.NQ_INST in self.requested_data:
5587 node_to_primary = dict([(name, set()) for name in nodenames])
5588 node_to_secondary = dict([(name, set()) for name in nodenames])
5590 inst_data = lu.cfg.GetAllInstancesInfo()
5592 for inst in inst_data.values():
5593 if inst.primary_node in node_to_primary:
5594 node_to_primary[inst.primary_node].add(inst.name)
5595 for secnode in inst.secondary_nodes:
5596 if secnode in node_to_secondary:
5597 node_to_secondary[secnode].add(inst.name)
5599 node_to_primary = None
5600 node_to_secondary = None
5602 if query.NQ_OOB in self.requested_data:
5603 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5604 for name, node in all_info.iteritems())
5608 if query.NQ_GROUP in self.requested_data:
5609 groups = lu.cfg.GetAllNodeGroupsInfo()
5613 return query.NodeQueryData([all_info[name] for name in nodenames],
5614 live_data, lu.cfg.GetMasterNode(),
5615 node_to_primary, node_to_secondary, groups,
5616 oob_support, lu.cfg.GetClusterInfo())
5619 class LUNodeQuery(NoHooksLU):
5620 """Logical unit for querying nodes.
5623 # pylint: disable=W0142
5626 def CheckArguments(self):
5627 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5628 self.op.output_fields, self.op.use_locking)
5630 def ExpandNames(self):
5631 self.nq.ExpandNames(self)
5633 def DeclareLocks(self, level):
5634 self.nq.DeclareLocks(self, level)
5636 def Exec(self, feedback_fn):
5637 return self.nq.OldStyleQuery(self)
5640 class LUNodeQueryvols(NoHooksLU):
5641 """Logical unit for getting volumes on node(s).
5645 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5646 _FIELDS_STATIC = utils.FieldSet("node")
5648 def CheckArguments(self):
5649 _CheckOutputFields(static=self._FIELDS_STATIC,
5650 dynamic=self._FIELDS_DYNAMIC,
5651 selected=self.op.output_fields)
5653 def ExpandNames(self):
5654 self.share_locks = _ShareAll()
5657 self.needed_locks = {
5658 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5661 self.needed_locks = {
5662 locking.LEVEL_NODE: locking.ALL_SET,
5663 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5666 def Exec(self, feedback_fn):
5667 """Computes the list of nodes and their attributes.
5670 nodenames = self.owned_locks(locking.LEVEL_NODE)
5671 volumes = self.rpc.call_node_volumes(nodenames)
5673 ilist = self.cfg.GetAllInstancesInfo()
5674 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5677 for node in nodenames:
5678 nresult = volumes[node]
5681 msg = nresult.fail_msg
5683 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5686 node_vols = sorted(nresult.payload,
5687 key=operator.itemgetter("dev"))
5689 for vol in node_vols:
5691 for field in self.op.output_fields:
5694 elif field == "phys":
5698 elif field == "name":
5700 elif field == "size":
5701 val = int(float(vol["size"]))
5702 elif field == "instance":
5703 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5705 raise errors.ParameterError(field)
5706 node_output.append(str(val))
5708 output.append(node_output)
5713 class LUNodeQueryStorage(NoHooksLU):
5714 """Logical unit for getting information on storage units on node(s).
5717 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5720 def CheckArguments(self):
5721 _CheckOutputFields(static=self._FIELDS_STATIC,
5722 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5723 selected=self.op.output_fields)
5725 def ExpandNames(self):
5726 self.share_locks = _ShareAll()
5729 self.needed_locks = {
5730 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5733 self.needed_locks = {
5734 locking.LEVEL_NODE: locking.ALL_SET,
5735 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5738 def Exec(self, feedback_fn):
5739 """Computes the list of nodes and their attributes.
5742 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5744 # Always get name to sort by
5745 if constants.SF_NAME in self.op.output_fields:
5746 fields = self.op.output_fields[:]
5748 fields = [constants.SF_NAME] + self.op.output_fields
5750 # Never ask for node or type as it's only known to the LU
5751 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5752 while extra in fields:
5753 fields.remove(extra)
5755 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5756 name_idx = field_idx[constants.SF_NAME]
5758 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5759 data = self.rpc.call_storage_list(self.nodes,
5760 self.op.storage_type, st_args,
5761 self.op.name, fields)
5765 for node in utils.NiceSort(self.nodes):
5766 nresult = data[node]
5770 msg = nresult.fail_msg
5772 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5775 rows = dict([(row[name_idx], row) for row in nresult.payload])
5777 for name in utils.NiceSort(rows.keys()):
5782 for field in self.op.output_fields:
5783 if field == constants.SF_NODE:
5785 elif field == constants.SF_TYPE:
5786 val = self.op.storage_type
5787 elif field in field_idx:
5788 val = row[field_idx[field]]
5790 raise errors.ParameterError(field)
5799 class _InstanceQuery(_QueryBase):
5800 FIELDS = query.INSTANCE_FIELDS
5802 def ExpandNames(self, lu):
5803 lu.needed_locks = {}
5804 lu.share_locks = _ShareAll()
5807 self.wanted = _GetWantedInstances(lu, self.names)
5809 self.wanted = locking.ALL_SET
5811 self.do_locking = (self.use_locking and
5812 query.IQ_LIVE in self.requested_data)
5814 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5815 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5816 lu.needed_locks[locking.LEVEL_NODE] = []
5817 lu.needed_locks[locking.LEVEL_NETWORK] = []
5818 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5820 self.do_grouplocks = (self.do_locking and
5821 query.IQ_NODES in self.requested_data)
5823 def DeclareLocks(self, lu, level):
5825 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5826 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5828 # Lock all groups used by instances optimistically; this requires going
5829 # via the node before it's locked, requiring verification later on
5830 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5832 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5833 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5834 elif level == locking.LEVEL_NODE:
5835 lu._LockInstancesNodes() # pylint: disable=W0212
5837 elif level == locking.LEVEL_NETWORK:
5838 lu.needed_locks[locking.LEVEL_NETWORK] = \
5840 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5841 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5844 def _CheckGroupLocks(lu):
5845 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5846 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5848 # Check if node groups for locked instances are still correct
5849 for instance_name in owned_instances:
5850 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5852 def _GetQueryData(self, lu):
5853 """Computes the list of instances and their attributes.
5856 if self.do_grouplocks:
5857 self._CheckGroupLocks(lu)
5859 cluster = lu.cfg.GetClusterInfo()
5860 all_info = lu.cfg.GetAllInstancesInfo()
5862 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5864 instance_list = [all_info[name] for name in instance_names]
5865 nodes = frozenset(itertools.chain(*(inst.all_nodes
5866 for inst in instance_list)))
5867 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5870 wrongnode_inst = set()
5872 # Gather data as requested
5873 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5875 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5877 result = node_data[name]
5879 # offline nodes will be in both lists
5880 assert result.fail_msg
5881 offline_nodes.append(name)
5883 bad_nodes.append(name)
5884 elif result.payload:
5885 for inst in result.payload:
5886 if inst in all_info:
5887 if all_info[inst].primary_node == name:
5888 live_data.update(result.payload)
5890 wrongnode_inst.add(inst)
5892 # orphan instance; we don't list it here as we don't
5893 # handle this case yet in the output of instance listing
5894 logging.warning("Orphan instance '%s' found on node %s",
5896 # else no instance is alive
5900 if query.IQ_DISKUSAGE in self.requested_data:
5901 gmi = ganeti.masterd.instance
5902 disk_usage = dict((inst.name,
5903 gmi.ComputeDiskSize(inst.disk_template,
5904 [{constants.IDISK_SIZE: disk.size}
5905 for disk in inst.disks]))
5906 for inst in instance_list)
5910 if query.IQ_CONSOLE in self.requested_data:
5912 for inst in instance_list:
5913 if inst.name in live_data:
5914 # Instance is running
5915 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5917 consinfo[inst.name] = None
5918 assert set(consinfo.keys()) == set(instance_names)
5922 if query.IQ_NODES in self.requested_data:
5923 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5925 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5926 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5927 for uuid in set(map(operator.attrgetter("group"),
5933 if query.IQ_NETWORKS in self.requested_data:
5934 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5935 for i in instance_list))
5936 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5940 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5941 disk_usage, offline_nodes, bad_nodes,
5942 live_data, wrongnode_inst, consinfo,
5943 nodes, groups, networks)
5946 class LUQuery(NoHooksLU):
5947 """Query for resources/items of a certain kind.
5950 # pylint: disable=W0142
5953 def CheckArguments(self):
5954 qcls = _GetQueryImplementation(self.op.what)
5956 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5958 def ExpandNames(self):
5959 self.impl.ExpandNames(self)
5961 def DeclareLocks(self, level):
5962 self.impl.DeclareLocks(self, level)
5964 def Exec(self, feedback_fn):
5965 return self.impl.NewStyleQuery(self)
5968 class LUQueryFields(NoHooksLU):
5969 """Query for resources/items of a certain kind.
5972 # pylint: disable=W0142
5975 def CheckArguments(self):
5976 self.qcls = _GetQueryImplementation(self.op.what)
5978 def ExpandNames(self):
5979 self.needed_locks = {}
5981 def Exec(self, feedback_fn):
5982 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5985 class LUNodeModifyStorage(NoHooksLU):
5986 """Logical unit for modifying a storage volume on a node.
5991 def CheckArguments(self):
5992 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5994 storage_type = self.op.storage_type
5997 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5999 raise errors.OpPrereqError("Storage units of type '%s' can not be"
6000 " modified" % storage_type,
6003 diff = set(self.op.changes.keys()) - modifiable
6005 raise errors.OpPrereqError("The following fields can not be modified for"
6006 " storage units of type '%s': %r" %
6007 (storage_type, list(diff)),
6010 def ExpandNames(self):
6011 self.needed_locks = {
6012 locking.LEVEL_NODE: self.op.node_name,
6015 def Exec(self, feedback_fn):
6016 """Computes the list of nodes and their attributes.
6019 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6020 result = self.rpc.call_storage_modify(self.op.node_name,
6021 self.op.storage_type, st_args,
6022 self.op.name, self.op.changes)
6023 result.Raise("Failed to modify storage unit '%s' on %s" %
6024 (self.op.name, self.op.node_name))
6027 class LUNodeAdd(LogicalUnit):
6028 """Logical unit for adding node to the cluster.
6032 HTYPE = constants.HTYPE_NODE
6033 _NFLAGS = ["master_capable", "vm_capable"]
6035 def CheckArguments(self):
6036 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
6037 # validate/normalize the node name
6038 self.hostname = netutils.GetHostname(name=self.op.node_name,
6039 family=self.primary_ip_family)
6040 self.op.node_name = self.hostname.name
6042 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6043 raise errors.OpPrereqError("Cannot readd the master node",
6046 if self.op.readd and self.op.group:
6047 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6048 " being readded", errors.ECODE_INVAL)
6050 def BuildHooksEnv(self):
6053 This will run on all nodes before, and on all nodes + the new node after.
6057 "OP_TARGET": self.op.node_name,
6058 "NODE_NAME": self.op.node_name,
6059 "NODE_PIP": self.op.primary_ip,
6060 "NODE_SIP": self.op.secondary_ip,
6061 "MASTER_CAPABLE": str(self.op.master_capable),
6062 "VM_CAPABLE": str(self.op.vm_capable),
6065 def BuildHooksNodes(self):
6066 """Build hooks nodes.
6069 # Exclude added node
6070 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6071 post_nodes = pre_nodes + [self.op.node_name, ]
6073 return (pre_nodes, post_nodes)
6075 def CheckPrereq(self):
6076 """Check prerequisites.
6079 - the new node is not already in the config
6081 - its parameters (single/dual homed) matches the cluster
6083 Any errors are signaled by raising errors.OpPrereqError.
6087 hostname = self.hostname
6088 node = hostname.name
6089 primary_ip = self.op.primary_ip = hostname.ip
6090 if self.op.secondary_ip is None:
6091 if self.primary_ip_family == netutils.IP6Address.family:
6092 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6093 " IPv4 address must be given as secondary",
6095 self.op.secondary_ip = primary_ip
6097 secondary_ip = self.op.secondary_ip
6098 if not netutils.IP4Address.IsValid(secondary_ip):
6099 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6100 " address" % secondary_ip, errors.ECODE_INVAL)
6102 node_list = cfg.GetNodeList()
6103 if not self.op.readd and node in node_list:
6104 raise errors.OpPrereqError("Node %s is already in the configuration" %
6105 node, errors.ECODE_EXISTS)
6106 elif self.op.readd and node not in node_list:
6107 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6110 self.changed_primary_ip = False
6112 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6113 if self.op.readd and node == existing_node_name:
6114 if existing_node.secondary_ip != secondary_ip:
6115 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6116 " address configuration as before",
6118 if existing_node.primary_ip != primary_ip:
6119 self.changed_primary_ip = True
6123 if (existing_node.primary_ip == primary_ip or
6124 existing_node.secondary_ip == primary_ip or
6125 existing_node.primary_ip == secondary_ip or
6126 existing_node.secondary_ip == secondary_ip):
6127 raise errors.OpPrereqError("New node ip address(es) conflict with"
6128 " existing node %s" % existing_node.name,
6129 errors.ECODE_NOTUNIQUE)
6131 # After this 'if' block, None is no longer a valid value for the
6132 # _capable op attributes
6134 old_node = self.cfg.GetNodeInfo(node)
6135 assert old_node is not None, "Can't retrieve locked node %s" % node
6136 for attr in self._NFLAGS:
6137 if getattr(self.op, attr) is None:
6138 setattr(self.op, attr, getattr(old_node, attr))
6140 for attr in self._NFLAGS:
6141 if getattr(self.op, attr) is None:
6142 setattr(self.op, attr, True)
6144 if self.op.readd and not self.op.vm_capable:
6145 pri, sec = cfg.GetNodeInstances(node)
6147 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6148 " flag set to false, but it already holds"
6149 " instances" % node,
6152 # check that the type of the node (single versus dual homed) is the
6153 # same as for the master
6154 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6155 master_singlehomed = myself.secondary_ip == myself.primary_ip
6156 newbie_singlehomed = secondary_ip == primary_ip
6157 if master_singlehomed != newbie_singlehomed:
6158 if master_singlehomed:
6159 raise errors.OpPrereqError("The master has no secondary ip but the"
6160 " new node has one",
6163 raise errors.OpPrereqError("The master has a secondary ip but the"
6164 " new node doesn't have one",
6167 # checks reachability
6168 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6169 raise errors.OpPrereqError("Node not reachable by ping",
6170 errors.ECODE_ENVIRON)
6172 if not newbie_singlehomed:
6173 # check reachability from my secondary ip to newbie's secondary ip
6174 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6175 source=myself.secondary_ip):
6176 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6177 " based ping to node daemon port",
6178 errors.ECODE_ENVIRON)
6185 if self.op.master_capable:
6186 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6188 self.master_candidate = False
6191 self.new_node = old_node
6193 node_group = cfg.LookupNodeGroup(self.op.group)
6194 self.new_node = objects.Node(name=node,
6195 primary_ip=primary_ip,
6196 secondary_ip=secondary_ip,
6197 master_candidate=self.master_candidate,
6198 offline=False, drained=False,
6199 group=node_group, ndparams={})
6201 if self.op.ndparams:
6202 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6203 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6204 "node", "cluster or group")
6206 if self.op.hv_state:
6207 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6209 if self.op.disk_state:
6210 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6212 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6213 # it a property on the base class.
6214 rpcrunner = rpc.DnsOnlyRunner()
6215 result = rpcrunner.call_version([node])[node]
6216 result.Raise("Can't get version information from node %s" % node)
6217 if constants.PROTOCOL_VERSION == result.payload:
6218 logging.info("Communication to node %s fine, sw version %s match",
6219 node, result.payload)
6221 raise errors.OpPrereqError("Version mismatch master version %s,"
6222 " node version %s" %
6223 (constants.PROTOCOL_VERSION, result.payload),
6224 errors.ECODE_ENVIRON)
6226 vg_name = cfg.GetVGName()
6227 if vg_name is not None:
6228 vparams = {constants.NV_PVLIST: [vg_name]}
6229 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6230 cname = self.cfg.GetClusterName()
6231 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6232 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6234 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6235 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6237 def Exec(self, feedback_fn):
6238 """Adds the new node to the cluster.
6241 new_node = self.new_node
6242 node = new_node.name
6244 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6247 # We adding a new node so we assume it's powered
6248 new_node.powered = True
6250 # for re-adds, reset the offline/drained/master-candidate flags;
6251 # we need to reset here, otherwise offline would prevent RPC calls
6252 # later in the procedure; this also means that if the re-add
6253 # fails, we are left with a non-offlined, broken node
6255 new_node.drained = new_node.offline = False # pylint: disable=W0201
6256 self.LogInfo("Readding a node, the offline/drained flags were reset")
6257 # if we demote the node, we do cleanup later in the procedure
6258 new_node.master_candidate = self.master_candidate
6259 if self.changed_primary_ip:
6260 new_node.primary_ip = self.op.primary_ip
6262 # copy the master/vm_capable flags
6263 for attr in self._NFLAGS:
6264 setattr(new_node, attr, getattr(self.op, attr))
6266 # notify the user about any possible mc promotion
6267 if new_node.master_candidate:
6268 self.LogInfo("Node will be a master candidate")
6270 if self.op.ndparams:
6271 new_node.ndparams = self.op.ndparams
6273 new_node.ndparams = {}
6275 if self.op.hv_state:
6276 new_node.hv_state_static = self.new_hv_state
6278 if self.op.disk_state:
6279 new_node.disk_state_static = self.new_disk_state
6281 # Add node to our /etc/hosts, and add key to known_hosts
6282 if self.cfg.GetClusterInfo().modify_etc_hosts:
6283 master_node = self.cfg.GetMasterNode()
6284 result = self.rpc.call_etc_hosts_modify(master_node,
6285 constants.ETC_HOSTS_ADD,
6288 result.Raise("Can't update hosts file with new host data")
6290 if new_node.secondary_ip != new_node.primary_ip:
6291 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6294 node_verify_list = [self.cfg.GetMasterNode()]
6295 node_verify_param = {
6296 constants.NV_NODELIST: ([node], {}),
6297 # TODO: do a node-net-test as well?
6300 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6301 self.cfg.GetClusterName())
6302 for verifier in node_verify_list:
6303 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6304 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6306 for failed in nl_payload:
6307 feedback_fn("ssh/hostname verification failed"
6308 " (checking from %s): %s" %
6309 (verifier, nl_payload[failed]))
6310 raise errors.OpExecError("ssh/hostname verification failed")
6313 _RedistributeAncillaryFiles(self)
6314 self.context.ReaddNode(new_node)
6315 # make sure we redistribute the config
6316 self.cfg.Update(new_node, feedback_fn)
6317 # and make sure the new node will not have old files around
6318 if not new_node.master_candidate:
6319 result = self.rpc.call_node_demote_from_mc(new_node.name)
6320 msg = result.fail_msg
6322 self.LogWarning("Node failed to demote itself from master"
6323 " candidate status: %s" % msg)
6325 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6326 additional_vm=self.op.vm_capable)
6327 self.context.AddNode(new_node, self.proc.GetECId())
6330 class LUNodeSetParams(LogicalUnit):
6331 """Modifies the parameters of a node.
6333 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6334 to the node role (as _ROLE_*)
6335 @cvar _R2F: a dictionary from node role to tuples of flags
6336 @cvar _FLAGS: a list of attribute names corresponding to the flags
6339 HPATH = "node-modify"
6340 HTYPE = constants.HTYPE_NODE
6342 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6344 (True, False, False): _ROLE_CANDIDATE,
6345 (False, True, False): _ROLE_DRAINED,
6346 (False, False, True): _ROLE_OFFLINE,
6347 (False, False, False): _ROLE_REGULAR,
6349 _R2F = dict((v, k) for k, v in _F2R.items())
6350 _FLAGS = ["master_candidate", "drained", "offline"]
6352 def CheckArguments(self):
6353 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6354 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6355 self.op.master_capable, self.op.vm_capable,
6356 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6358 if all_mods.count(None) == len(all_mods):
6359 raise errors.OpPrereqError("Please pass at least one modification",
6361 if all_mods.count(True) > 1:
6362 raise errors.OpPrereqError("Can't set the node into more than one"
6363 " state at the same time",
6366 # Boolean value that tells us whether we might be demoting from MC
6367 self.might_demote = (self.op.master_candidate is False or
6368 self.op.offline is True or
6369 self.op.drained is True or
6370 self.op.master_capable is False)
6372 if self.op.secondary_ip:
6373 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6374 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6375 " address" % self.op.secondary_ip,
6378 self.lock_all = self.op.auto_promote and self.might_demote
6379 self.lock_instances = self.op.secondary_ip is not None
6381 def _InstanceFilter(self, instance):
6382 """Filter for getting affected instances.
6385 return (instance.disk_template in constants.DTS_INT_MIRROR and
6386 self.op.node_name in instance.all_nodes)
6388 def ExpandNames(self):
6390 self.needed_locks = {
6391 locking.LEVEL_NODE: locking.ALL_SET,
6393 # Block allocations when all nodes are locked
6394 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6397 self.needed_locks = {
6398 locking.LEVEL_NODE: self.op.node_name,
6401 # Since modifying a node can have severe effects on currently running
6402 # operations the resource lock is at least acquired in shared mode
6403 self.needed_locks[locking.LEVEL_NODE_RES] = \
6404 self.needed_locks[locking.LEVEL_NODE]
6406 # Get all locks except nodes in shared mode; they are not used for anything
6407 # but read-only access
6408 self.share_locks = _ShareAll()
6409 self.share_locks[locking.LEVEL_NODE] = 0
6410 self.share_locks[locking.LEVEL_NODE_RES] = 0
6411 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6413 if self.lock_instances:
6414 self.needed_locks[locking.LEVEL_INSTANCE] = \
6415 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6417 def BuildHooksEnv(self):
6420 This runs on the master node.
6424 "OP_TARGET": self.op.node_name,
6425 "MASTER_CANDIDATE": str(self.op.master_candidate),
6426 "OFFLINE": str(self.op.offline),
6427 "DRAINED": str(self.op.drained),
6428 "MASTER_CAPABLE": str(self.op.master_capable),
6429 "VM_CAPABLE": str(self.op.vm_capable),
6432 def BuildHooksNodes(self):
6433 """Build hooks nodes.
6436 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6439 def CheckPrereq(self):
6440 """Check prerequisites.
6442 This only checks the instance list against the existing names.
6445 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6447 if self.lock_instances:
6448 affected_instances = \
6449 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6451 # Verify instance locks
6452 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6453 wanted_instances = frozenset(affected_instances.keys())
6454 if wanted_instances - owned_instances:
6455 raise errors.OpPrereqError("Instances affected by changing node %s's"
6456 " secondary IP address have changed since"
6457 " locks were acquired, wanted '%s', have"
6458 " '%s'; retry the operation" %
6460 utils.CommaJoin(wanted_instances),
6461 utils.CommaJoin(owned_instances)),
6464 affected_instances = None
6466 if (self.op.master_candidate is not None or
6467 self.op.drained is not None or
6468 self.op.offline is not None):
6469 # we can't change the master's node flags
6470 if self.op.node_name == self.cfg.GetMasterNode():
6471 raise errors.OpPrereqError("The master role can be changed"
6472 " only via master-failover",
6475 if self.op.master_candidate and not node.master_capable:
6476 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6477 " it a master candidate" % node.name,
6480 if self.op.vm_capable is False:
6481 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6483 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6484 " the vm_capable flag" % node.name,
6487 if node.master_candidate and self.might_demote and not self.lock_all:
6488 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6489 # check if after removing the current node, we're missing master
6491 (mc_remaining, mc_should, _) = \
6492 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6493 if mc_remaining < mc_should:
6494 raise errors.OpPrereqError("Not enough master candidates, please"
6495 " pass auto promote option to allow"
6496 " promotion (--auto-promote or RAPI"
6497 " auto_promote=True)", errors.ECODE_STATE)
6499 self.old_flags = old_flags = (node.master_candidate,
6500 node.drained, node.offline)
6501 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6502 self.old_role = old_role = self._F2R[old_flags]
6504 # Check for ineffective changes
6505 for attr in self._FLAGS:
6506 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6507 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6508 setattr(self.op, attr, None)
6510 # Past this point, any flag change to False means a transition
6511 # away from the respective state, as only real changes are kept
6513 # TODO: We might query the real power state if it supports OOB
6514 if _SupportsOob(self.cfg, node):
6515 if self.op.offline is False and not (node.powered or
6516 self.op.powered is True):
6517 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6518 " offline status can be reset") %
6519 self.op.node_name, errors.ECODE_STATE)
6520 elif self.op.powered is not None:
6521 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6522 " as it does not support out-of-band"
6523 " handling") % self.op.node_name,
6526 # If we're being deofflined/drained, we'll MC ourself if needed
6527 if (self.op.drained is False or self.op.offline is False or
6528 (self.op.master_capable and not node.master_capable)):
6529 if _DecideSelfPromotion(self):
6530 self.op.master_candidate = True
6531 self.LogInfo("Auto-promoting node to master candidate")
6533 # If we're no longer master capable, we'll demote ourselves from MC
6534 if self.op.master_capable is False and node.master_candidate:
6535 self.LogInfo("Demoting from master candidate")
6536 self.op.master_candidate = False
6539 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6540 if self.op.master_candidate:
6541 new_role = self._ROLE_CANDIDATE
6542 elif self.op.drained:
6543 new_role = self._ROLE_DRAINED
6544 elif self.op.offline:
6545 new_role = self._ROLE_OFFLINE
6546 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6547 # False is still in new flags, which means we're un-setting (the
6549 new_role = self._ROLE_REGULAR
6550 else: # no new flags, nothing, keep old role
6553 self.new_role = new_role
6555 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6556 # Trying to transition out of offline status
6557 result = self.rpc.call_version([node.name])[node.name]
6559 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6560 " to report its version: %s" %
6561 (node.name, result.fail_msg),
6564 self.LogWarning("Transitioning node from offline to online state"
6565 " without using re-add. Please make sure the node"
6568 # When changing the secondary ip, verify if this is a single-homed to
6569 # multi-homed transition or vice versa, and apply the relevant
6571 if self.op.secondary_ip:
6572 # Ok even without locking, because this can't be changed by any LU
6573 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6574 master_singlehomed = master.secondary_ip == master.primary_ip
6575 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6576 if self.op.force and node.name == master.name:
6577 self.LogWarning("Transitioning from single-homed to multi-homed"
6578 " cluster; all nodes will require a secondary IP"
6581 raise errors.OpPrereqError("Changing the secondary ip on a"
6582 " single-homed cluster requires the"
6583 " --force option to be passed, and the"
6584 " target node to be the master",
6586 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6587 if self.op.force and node.name == master.name:
6588 self.LogWarning("Transitioning from multi-homed to single-homed"
6589 " cluster; secondary IP addresses will have to be"
6592 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6593 " same as the primary IP on a multi-homed"
6594 " cluster, unless the --force option is"
6595 " passed, and the target node is the"
6596 " master", errors.ECODE_INVAL)
6598 assert not (frozenset(affected_instances) -
6599 self.owned_locks(locking.LEVEL_INSTANCE))
6602 if affected_instances:
6603 msg = ("Cannot change secondary IP address: offline node has"
6604 " instances (%s) configured to use it" %
6605 utils.CommaJoin(affected_instances.keys()))
6606 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6608 # On online nodes, check that no instances are running, and that
6609 # the node has the new ip and we can reach it.
6610 for instance in affected_instances.values():
6611 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6612 msg="cannot change secondary ip")
6614 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6615 if master.name != node.name:
6616 # check reachability from master secondary ip to new secondary ip
6617 if not netutils.TcpPing(self.op.secondary_ip,
6618 constants.DEFAULT_NODED_PORT,
6619 source=master.secondary_ip):
6620 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6621 " based ping to node daemon port",
6622 errors.ECODE_ENVIRON)
6624 if self.op.ndparams:
6625 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6626 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6627 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6628 "node", "cluster or group")
6629 self.new_ndparams = new_ndparams
6631 if self.op.hv_state:
6632 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6633 self.node.hv_state_static)
6635 if self.op.disk_state:
6636 self.new_disk_state = \
6637 _MergeAndVerifyDiskState(self.op.disk_state,
6638 self.node.disk_state_static)
6640 def Exec(self, feedback_fn):
6645 old_role = self.old_role
6646 new_role = self.new_role
6650 if self.op.ndparams:
6651 node.ndparams = self.new_ndparams
6653 if self.op.powered is not None:
6654 node.powered = self.op.powered
6656 if self.op.hv_state:
6657 node.hv_state_static = self.new_hv_state
6659 if self.op.disk_state:
6660 node.disk_state_static = self.new_disk_state
6662 for attr in ["master_capable", "vm_capable"]:
6663 val = getattr(self.op, attr)
6665 setattr(node, attr, val)
6666 result.append((attr, str(val)))
6668 if new_role != old_role:
6669 # Tell the node to demote itself, if no longer MC and not offline
6670 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6671 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6673 self.LogWarning("Node failed to demote itself: %s", msg)
6675 new_flags = self._R2F[new_role]
6676 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6678 result.append((desc, str(nf)))
6679 (node.master_candidate, node.drained, node.offline) = new_flags
6681 # we locked all nodes, we adjust the CP before updating this node
6683 _AdjustCandidatePool(self, [node.name])
6685 if self.op.secondary_ip:
6686 node.secondary_ip = self.op.secondary_ip
6687 result.append(("secondary_ip", self.op.secondary_ip))
6689 # this will trigger configuration file update, if needed
6690 self.cfg.Update(node, feedback_fn)
6692 # this will trigger job queue propagation or cleanup if the mc
6694 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6695 self.context.ReaddNode(node)
6700 class LUNodePowercycle(NoHooksLU):
6701 """Powercycles a node.
6706 def CheckArguments(self):
6707 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6708 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6709 raise errors.OpPrereqError("The node is the master and the force"
6710 " parameter was not set",
6713 def ExpandNames(self):
6714 """Locking for PowercycleNode.
6716 This is a last-resort option and shouldn't block on other
6717 jobs. Therefore, we grab no locks.
6720 self.needed_locks = {}
6722 def Exec(self, feedback_fn):
6726 result = self.rpc.call_node_powercycle(self.op.node_name,
6727 self.cfg.GetHypervisorType())
6728 result.Raise("Failed to schedule the reboot")
6729 return result.payload
6732 class LUClusterQuery(NoHooksLU):
6733 """Query cluster configuration.
6738 def ExpandNames(self):
6739 self.needed_locks = {}
6741 def Exec(self, feedback_fn):
6742 """Return cluster config.
6745 cluster = self.cfg.GetClusterInfo()
6748 # Filter just for enabled hypervisors
6749 for os_name, hv_dict in cluster.os_hvp.items():
6750 os_hvp[os_name] = {}
6751 for hv_name, hv_params in hv_dict.items():
6752 if hv_name in cluster.enabled_hypervisors:
6753 os_hvp[os_name][hv_name] = hv_params
6755 # Convert ip_family to ip_version
6756 primary_ip_version = constants.IP4_VERSION
6757 if cluster.primary_ip_family == netutils.IP6Address.family:
6758 primary_ip_version = constants.IP6_VERSION
6761 "software_version": constants.RELEASE_VERSION,
6762 "protocol_version": constants.PROTOCOL_VERSION,
6763 "config_version": constants.CONFIG_VERSION,
6764 "os_api_version": max(constants.OS_API_VERSIONS),
6765 "export_version": constants.EXPORT_VERSION,
6766 "architecture": runtime.GetArchInfo(),
6767 "name": cluster.cluster_name,
6768 "master": cluster.master_node,
6769 "default_hypervisor": cluster.primary_hypervisor,
6770 "enabled_hypervisors": cluster.enabled_hypervisors,
6771 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6772 for hypervisor_name in cluster.enabled_hypervisors]),
6774 "beparams": cluster.beparams,
6775 "osparams": cluster.osparams,
6776 "ipolicy": cluster.ipolicy,
6777 "nicparams": cluster.nicparams,
6778 "ndparams": cluster.ndparams,
6779 "diskparams": cluster.diskparams,
6780 "candidate_pool_size": cluster.candidate_pool_size,
6781 "master_netdev": cluster.master_netdev,
6782 "master_netmask": cluster.master_netmask,
6783 "use_external_mip_script": cluster.use_external_mip_script,
6784 "volume_group_name": cluster.volume_group_name,
6785 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6786 "file_storage_dir": cluster.file_storage_dir,
6787 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6788 "maintain_node_health": cluster.maintain_node_health,
6789 "ctime": cluster.ctime,
6790 "mtime": cluster.mtime,
6791 "uuid": cluster.uuid,
6792 "tags": list(cluster.GetTags()),
6793 "uid_pool": cluster.uid_pool,
6794 "default_iallocator": cluster.default_iallocator,
6795 "reserved_lvs": cluster.reserved_lvs,
6796 "primary_ip_version": primary_ip_version,
6797 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6798 "hidden_os": cluster.hidden_os,
6799 "blacklisted_os": cluster.blacklisted_os,
6805 class LUClusterConfigQuery(NoHooksLU):
6806 """Return configuration values.
6811 def CheckArguments(self):
6812 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6814 def ExpandNames(self):
6815 self.cq.ExpandNames(self)
6817 def DeclareLocks(self, level):
6818 self.cq.DeclareLocks(self, level)
6820 def Exec(self, feedback_fn):
6821 result = self.cq.OldStyleQuery(self)
6823 assert len(result) == 1
6828 class _ClusterQuery(_QueryBase):
6829 FIELDS = query.CLUSTER_FIELDS
6831 #: Do not sort (there is only one item)
6834 def ExpandNames(self, lu):
6835 lu.needed_locks = {}
6837 # The following variables interact with _QueryBase._GetNames
6838 self.wanted = locking.ALL_SET
6839 self.do_locking = self.use_locking
6842 raise errors.OpPrereqError("Can not use locking for cluster queries",
6845 def DeclareLocks(self, lu, level):
6848 def _GetQueryData(self, lu):
6849 """Computes the list of nodes and their attributes.
6852 # Locking is not used
6853 assert not (compat.any(lu.glm.is_owned(level)
6854 for level in locking.LEVELS
6855 if level != locking.LEVEL_CLUSTER) or
6856 self.do_locking or self.use_locking)
6858 if query.CQ_CONFIG in self.requested_data:
6859 cluster = lu.cfg.GetClusterInfo()
6861 cluster = NotImplemented
6863 if query.CQ_QUEUE_DRAINED in self.requested_data:
6864 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6866 drain_flag = NotImplemented
6868 if query.CQ_WATCHER_PAUSE in self.requested_data:
6869 master_name = lu.cfg.GetMasterNode()
6871 result = lu.rpc.call_get_watcher_pause(master_name)
6872 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6875 watcher_pause = result.payload
6877 watcher_pause = NotImplemented
6879 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6882 class LUInstanceActivateDisks(NoHooksLU):
6883 """Bring up an instance's disks.
6888 def ExpandNames(self):
6889 self._ExpandAndLockInstance()
6890 self.needed_locks[locking.LEVEL_NODE] = []
6891 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6893 def DeclareLocks(self, level):
6894 if level == locking.LEVEL_NODE:
6895 self._LockInstancesNodes()
6897 def CheckPrereq(self):
6898 """Check prerequisites.
6900 This checks that the instance is in the cluster.
6903 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6904 assert self.instance is not None, \
6905 "Cannot retrieve locked instance %s" % self.op.instance_name
6906 _CheckNodeOnline(self, self.instance.primary_node)
6908 def Exec(self, feedback_fn):
6909 """Activate the disks.
6912 disks_ok, disks_info = \
6913 _AssembleInstanceDisks(self, self.instance,
6914 ignore_size=self.op.ignore_size)
6916 raise errors.OpExecError("Cannot activate block devices")
6918 if self.op.wait_for_sync:
6919 if not _WaitForSync(self, self.instance):
6920 raise errors.OpExecError("Some disks of the instance are degraded!")
6925 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6927 """Prepare the block devices for an instance.
6929 This sets up the block devices on all nodes.
6931 @type lu: L{LogicalUnit}
6932 @param lu: the logical unit on whose behalf we execute
6933 @type instance: L{objects.Instance}
6934 @param instance: the instance for whose disks we assemble
6935 @type disks: list of L{objects.Disk} or None
6936 @param disks: which disks to assemble (or all, if None)
6937 @type ignore_secondaries: boolean
6938 @param ignore_secondaries: if true, errors on secondary nodes
6939 won't result in an error return from the function
6940 @type ignore_size: boolean
6941 @param ignore_size: if true, the current known size of the disk
6942 will not be used during the disk activation, useful for cases
6943 when the size is wrong
6944 @return: False if the operation failed, otherwise a list of
6945 (host, instance_visible_name, node_visible_name)
6946 with the mapping from node devices to instance devices
6951 iname = instance.name
6952 disks = _ExpandCheckDisks(instance, disks)
6954 # With the two passes mechanism we try to reduce the window of
6955 # opportunity for the race condition of switching DRBD to primary
6956 # before handshaking occured, but we do not eliminate it
6958 # The proper fix would be to wait (with some limits) until the
6959 # connection has been made and drbd transitions from WFConnection
6960 # into any other network-connected state (Connected, SyncTarget,
6963 # 1st pass, assemble on all nodes in secondary mode
6964 for idx, inst_disk in enumerate(disks):
6965 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6967 node_disk = node_disk.Copy()
6968 node_disk.UnsetSize()
6969 lu.cfg.SetDiskID(node_disk, node)
6970 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6972 msg = result.fail_msg
6974 is_offline_secondary = (node in instance.secondary_nodes and
6976 lu.LogWarning("Could not prepare block device %s on node %s"
6977 " (is_primary=False, pass=1): %s",
6978 inst_disk.iv_name, node, msg)
6979 if not (ignore_secondaries or is_offline_secondary):
6982 # FIXME: race condition on drbd migration to primary
6984 # 2nd pass, do only the primary node
6985 for idx, inst_disk in enumerate(disks):
6988 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6989 if node != instance.primary_node:
6992 node_disk = node_disk.Copy()
6993 node_disk.UnsetSize()
6994 lu.cfg.SetDiskID(node_disk, node)
6995 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6997 msg = result.fail_msg
6999 lu.LogWarning("Could not prepare block device %s on node %s"
7000 " (is_primary=True, pass=2): %s",
7001 inst_disk.iv_name, node, msg)
7004 dev_path = result.payload
7006 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
7008 # leave the disks configured for the primary node
7009 # this is a workaround that would be fixed better by
7010 # improving the logical/physical id handling
7012 lu.cfg.SetDiskID(disk, instance.primary_node)
7014 return disks_ok, device_info
7017 def _StartInstanceDisks(lu, instance, force):
7018 """Start the disks of an instance.
7021 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
7022 ignore_secondaries=force)
7024 _ShutdownInstanceDisks(lu, instance)
7025 if force is not None and not force:
7027 hint=("If the message above refers to a secondary node,"
7028 " you can retry the operation using '--force'"))
7029 raise errors.OpExecError("Disk consistency error")
7032 class LUInstanceDeactivateDisks(NoHooksLU):
7033 """Shutdown an instance's disks.
7038 def ExpandNames(self):
7039 self._ExpandAndLockInstance()
7040 self.needed_locks[locking.LEVEL_NODE] = []
7041 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7043 def DeclareLocks(self, level):
7044 if level == locking.LEVEL_NODE:
7045 self._LockInstancesNodes()
7047 def CheckPrereq(self):
7048 """Check prerequisites.
7050 This checks that the instance is in the cluster.
7053 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7054 assert self.instance is not None, \
7055 "Cannot retrieve locked instance %s" % self.op.instance_name
7057 def Exec(self, feedback_fn):
7058 """Deactivate the disks
7061 instance = self.instance
7063 _ShutdownInstanceDisks(self, instance)
7065 _SafeShutdownInstanceDisks(self, instance)
7068 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7069 """Shutdown block devices of an instance.
7071 This function checks if an instance is running, before calling
7072 _ShutdownInstanceDisks.
7075 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7076 _ShutdownInstanceDisks(lu, instance, disks=disks)
7079 def _ExpandCheckDisks(instance, disks):
7080 """Return the instance disks selected by the disks list
7082 @type disks: list of L{objects.Disk} or None
7083 @param disks: selected disks
7084 @rtype: list of L{objects.Disk}
7085 @return: selected instance disks to act on
7089 return instance.disks
7091 if not set(disks).issubset(instance.disks):
7092 raise errors.ProgrammerError("Can only act on disks belonging to the"
7097 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7098 """Shutdown block devices of an instance.
7100 This does the shutdown on all nodes of the instance.
7102 If the ignore_primary is false, errors on the primary node are
7107 disks = _ExpandCheckDisks(instance, disks)
7110 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7111 lu.cfg.SetDiskID(top_disk, node)
7112 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7113 msg = result.fail_msg
7115 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7116 disk.iv_name, node, msg)
7117 if ((node == instance.primary_node and not ignore_primary) or
7118 (node != instance.primary_node and not result.offline)):
7123 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7124 """Checks if a node has enough free memory.
7126 This function checks if a given node has the needed amount of free
7127 memory. In case the node has less memory or we cannot get the
7128 information from the node, this function raises an OpPrereqError
7131 @type lu: C{LogicalUnit}
7132 @param lu: a logical unit from which we get configuration data
7134 @param node: the node to check
7135 @type reason: C{str}
7136 @param reason: string to use in the error message
7137 @type requested: C{int}
7138 @param requested: the amount of memory in MiB to check for
7139 @type hypervisor_name: C{str}
7140 @param hypervisor_name: the hypervisor to ask for memory stats
7142 @return: node current free memory
7143 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7144 we cannot check the node
7147 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7148 nodeinfo[node].Raise("Can't get data from node %s" % node,
7149 prereq=True, ecode=errors.ECODE_ENVIRON)
7150 (_, _, (hv_info, )) = nodeinfo[node].payload
7152 free_mem = hv_info.get("memory_free", None)
7153 if not isinstance(free_mem, int):
7154 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7155 " was '%s'" % (node, free_mem),
7156 errors.ECODE_ENVIRON)
7157 if requested > free_mem:
7158 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7159 " needed %s MiB, available %s MiB" %
7160 (node, reason, requested, free_mem),
7165 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7166 """Checks if nodes have enough free disk space in all the VGs.
7168 This function checks if all given nodes have the needed amount of
7169 free disk. In case any node has less disk or we cannot get the
7170 information from the node, this function raises an OpPrereqError
7173 @type lu: C{LogicalUnit}
7174 @param lu: a logical unit from which we get configuration data
7175 @type nodenames: C{list}
7176 @param nodenames: the list of node names to check
7177 @type req_sizes: C{dict}
7178 @param req_sizes: the hash of vg and corresponding amount of disk in
7180 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7181 or we cannot check the node
7184 for vg, req_size in req_sizes.items():
7185 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7188 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7189 """Checks if nodes have enough free disk space in the specified VG.
7191 This function checks if all given nodes have the needed amount of
7192 free disk. In case any node has less disk or we cannot get the
7193 information from the node, this function raises an OpPrereqError
7196 @type lu: C{LogicalUnit}
7197 @param lu: a logical unit from which we get configuration data
7198 @type nodenames: C{list}
7199 @param nodenames: the list of node names to check
7201 @param vg: the volume group to check
7202 @type requested: C{int}
7203 @param requested: the amount of disk in MiB to check for
7204 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7205 or we cannot check the node
7208 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7209 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7210 for node in nodenames:
7211 info = nodeinfo[node]
7212 info.Raise("Cannot get current information from node %s" % node,
7213 prereq=True, ecode=errors.ECODE_ENVIRON)
7214 (_, (vg_info, ), _) = info.payload
7215 vg_free = vg_info.get("vg_free", None)
7216 if not isinstance(vg_free, int):
7217 raise errors.OpPrereqError("Can't compute free disk space on node"
7218 " %s for vg %s, result was '%s'" %
7219 (node, vg, vg_free), errors.ECODE_ENVIRON)
7220 if requested > vg_free:
7221 raise errors.OpPrereqError("Not enough disk space on target node %s"
7222 " vg %s: required %d MiB, available %d MiB" %
7223 (node, vg, requested, vg_free),
7227 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7228 """Checks if nodes have enough physical CPUs
7230 This function checks if all given nodes have the needed number of
7231 physical CPUs. In case any node has less CPUs or we cannot get the
7232 information from the node, this function raises an OpPrereqError
7235 @type lu: C{LogicalUnit}
7236 @param lu: a logical unit from which we get configuration data
7237 @type nodenames: C{list}
7238 @param nodenames: the list of node names to check
7239 @type requested: C{int}
7240 @param requested: the minimum acceptable number of physical CPUs
7241 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7242 or we cannot check the node
7245 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7246 for node in nodenames:
7247 info = nodeinfo[node]
7248 info.Raise("Cannot get current information from node %s" % node,
7249 prereq=True, ecode=errors.ECODE_ENVIRON)
7250 (_, _, (hv_info, )) = info.payload
7251 num_cpus = hv_info.get("cpu_total", None)
7252 if not isinstance(num_cpus, int):
7253 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7254 " on node %s, result was '%s'" %
7255 (node, num_cpus), errors.ECODE_ENVIRON)
7256 if requested > num_cpus:
7257 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7258 "required" % (node, num_cpus, requested),
7262 class LUInstanceStartup(LogicalUnit):
7263 """Starts an instance.
7266 HPATH = "instance-start"
7267 HTYPE = constants.HTYPE_INSTANCE
7270 def CheckArguments(self):
7272 if self.op.beparams:
7273 # fill the beparams dict
7274 objects.UpgradeBeParams(self.op.beparams)
7275 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7277 def ExpandNames(self):
7278 self._ExpandAndLockInstance()
7279 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7281 def DeclareLocks(self, level):
7282 if level == locking.LEVEL_NODE_RES:
7283 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7285 def BuildHooksEnv(self):
7288 This runs on master, primary and secondary nodes of the instance.
7292 "FORCE": self.op.force,
7295 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7299 def BuildHooksNodes(self):
7300 """Build hooks nodes.
7303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7306 def CheckPrereq(self):
7307 """Check prerequisites.
7309 This checks that the instance is in the cluster.
7312 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7313 assert self.instance is not None, \
7314 "Cannot retrieve locked instance %s" % self.op.instance_name
7317 if self.op.hvparams:
7318 # check hypervisor parameter syntax (locally)
7319 cluster = self.cfg.GetClusterInfo()
7320 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7321 filled_hvp = cluster.FillHV(instance)
7322 filled_hvp.update(self.op.hvparams)
7323 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7324 hv_type.CheckParameterSyntax(filled_hvp)
7325 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7327 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7329 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7331 if self.primary_offline and self.op.ignore_offline_nodes:
7332 self.LogWarning("Ignoring offline primary node")
7334 if self.op.hvparams or self.op.beparams:
7335 self.LogWarning("Overridden parameters are ignored")
7337 _CheckNodeOnline(self, instance.primary_node)
7339 bep = self.cfg.GetClusterInfo().FillBE(instance)
7340 bep.update(self.op.beparams)
7342 # check bridges existence
7343 _CheckInstanceBridgesExist(self, instance)
7345 remote_info = self.rpc.call_instance_info(instance.primary_node,
7347 instance.hypervisor)
7348 remote_info.Raise("Error checking node %s" % instance.primary_node,
7349 prereq=True, ecode=errors.ECODE_ENVIRON)
7350 if not remote_info.payload: # not running already
7351 _CheckNodeFreeMemory(self, instance.primary_node,
7352 "starting instance %s" % instance.name,
7353 bep[constants.BE_MINMEM], instance.hypervisor)
7355 def Exec(self, feedback_fn):
7356 """Start the instance.
7359 instance = self.instance
7360 force = self.op.force
7362 if not self.op.no_remember:
7363 self.cfg.MarkInstanceUp(instance.name)
7365 if self.primary_offline:
7366 assert self.op.ignore_offline_nodes
7367 self.LogInfo("Primary node offline, marked instance as started")
7369 node_current = instance.primary_node
7371 _StartInstanceDisks(self, instance, force)
7374 self.rpc.call_instance_start(node_current,
7375 (instance, self.op.hvparams,
7377 self.op.startup_paused)
7378 msg = result.fail_msg
7380 _ShutdownInstanceDisks(self, instance)
7381 raise errors.OpExecError("Could not start instance: %s" % msg)
7384 class LUInstanceReboot(LogicalUnit):
7385 """Reboot an instance.
7388 HPATH = "instance-reboot"
7389 HTYPE = constants.HTYPE_INSTANCE
7392 def ExpandNames(self):
7393 self._ExpandAndLockInstance()
7395 def BuildHooksEnv(self):
7398 This runs on master, primary and secondary nodes of the instance.
7402 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7403 "REBOOT_TYPE": self.op.reboot_type,
7404 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7407 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7411 def BuildHooksNodes(self):
7412 """Build hooks nodes.
7415 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7418 def CheckPrereq(self):
7419 """Check prerequisites.
7421 This checks that the instance is in the cluster.
7424 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7425 assert self.instance is not None, \
7426 "Cannot retrieve locked instance %s" % self.op.instance_name
7427 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7428 _CheckNodeOnline(self, instance.primary_node)
7430 # check bridges existence
7431 _CheckInstanceBridgesExist(self, instance)
7433 def Exec(self, feedback_fn):
7434 """Reboot the instance.
7437 instance = self.instance
7438 ignore_secondaries = self.op.ignore_secondaries
7439 reboot_type = self.op.reboot_type
7440 reason = self.op.reason
7442 remote_info = self.rpc.call_instance_info(instance.primary_node,
7444 instance.hypervisor)
7445 remote_info.Raise("Error checking node %s" % instance.primary_node)
7446 instance_running = bool(remote_info.payload)
7448 node_current = instance.primary_node
7450 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7451 constants.INSTANCE_REBOOT_HARD]:
7452 for disk in instance.disks:
7453 self.cfg.SetDiskID(disk, node_current)
7454 result = self.rpc.call_instance_reboot(node_current, instance,
7456 self.op.shutdown_timeout,
7458 result.Raise("Could not reboot instance")
7460 if instance_running:
7461 result = self.rpc.call_instance_shutdown(node_current, instance,
7462 self.op.shutdown_timeout)
7463 result.Raise("Could not shutdown instance for full reboot")
7464 _ShutdownInstanceDisks(self, instance)
7466 self.LogInfo("Instance %s was already stopped, starting now",
7468 _StartInstanceDisks(self, instance, ignore_secondaries)
7469 result = self.rpc.call_instance_start(node_current,
7470 (instance, None, None), False)
7471 msg = result.fail_msg
7473 _ShutdownInstanceDisks(self, instance)
7474 raise errors.OpExecError("Could not start instance for"
7475 " full reboot: %s" % msg)
7477 self.cfg.MarkInstanceUp(instance.name)
7480 class LUInstanceShutdown(LogicalUnit):
7481 """Shutdown an instance.
7484 HPATH = "instance-stop"
7485 HTYPE = constants.HTYPE_INSTANCE
7488 def ExpandNames(self):
7489 self._ExpandAndLockInstance()
7491 def BuildHooksEnv(self):
7494 This runs on master, primary and secondary nodes of the instance.
7497 env = _BuildInstanceHookEnvByObject(self, self.instance)
7498 env["TIMEOUT"] = self.op.timeout
7501 def BuildHooksNodes(self):
7502 """Build hooks nodes.
7505 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7508 def CheckPrereq(self):
7509 """Check prerequisites.
7511 This checks that the instance is in the cluster.
7514 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7515 assert self.instance is not None, \
7516 "Cannot retrieve locked instance %s" % self.op.instance_name
7518 if not self.op.force:
7519 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7521 self.LogWarning("Ignoring offline instance check")
7523 self.primary_offline = \
7524 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7526 if self.primary_offline and self.op.ignore_offline_nodes:
7527 self.LogWarning("Ignoring offline primary node")
7529 _CheckNodeOnline(self, self.instance.primary_node)
7531 def Exec(self, feedback_fn):
7532 """Shutdown the instance.
7535 instance = self.instance
7536 node_current = instance.primary_node
7537 timeout = self.op.timeout
7539 # If the instance is offline we shouldn't mark it as down, as that
7540 # resets the offline flag.
7541 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7542 self.cfg.MarkInstanceDown(instance.name)
7544 if self.primary_offline:
7545 assert self.op.ignore_offline_nodes
7546 self.LogInfo("Primary node offline, marked instance as stopped")
7548 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7549 msg = result.fail_msg
7551 self.LogWarning("Could not shutdown instance: %s", msg)
7553 _ShutdownInstanceDisks(self, instance)
7556 class LUInstanceReinstall(LogicalUnit):
7557 """Reinstall an instance.
7560 HPATH = "instance-reinstall"
7561 HTYPE = constants.HTYPE_INSTANCE
7564 def ExpandNames(self):
7565 self._ExpandAndLockInstance()
7567 def BuildHooksEnv(self):
7570 This runs on master, primary and secondary nodes of the instance.
7573 return _BuildInstanceHookEnvByObject(self, self.instance)
7575 def BuildHooksNodes(self):
7576 """Build hooks nodes.
7579 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7582 def CheckPrereq(self):
7583 """Check prerequisites.
7585 This checks that the instance is in the cluster and is not running.
7588 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7589 assert instance is not None, \
7590 "Cannot retrieve locked instance %s" % self.op.instance_name
7591 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7592 " offline, cannot reinstall")
7594 if instance.disk_template == constants.DT_DISKLESS:
7595 raise errors.OpPrereqError("Instance '%s' has no disks" %
7596 self.op.instance_name,
7598 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7600 if self.op.os_type is not None:
7602 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7603 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7604 instance_os = self.op.os_type
7606 instance_os = instance.os
7608 nodelist = list(instance.all_nodes)
7610 if self.op.osparams:
7611 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7612 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7613 self.os_inst = i_osdict # the new dict (without defaults)
7617 self.instance = instance
7619 def Exec(self, feedback_fn):
7620 """Reinstall the instance.
7623 inst = self.instance
7625 if self.op.os_type is not None:
7626 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7627 inst.os = self.op.os_type
7628 # Write to configuration
7629 self.cfg.Update(inst, feedback_fn)
7631 _StartInstanceDisks(self, inst, None)
7633 feedback_fn("Running the instance OS create scripts...")
7634 # FIXME: pass debug option from opcode to backend
7635 result = self.rpc.call_instance_os_add(inst.primary_node,
7636 (inst, self.os_inst), True,
7637 self.op.debug_level)
7638 result.Raise("Could not install OS for instance %s on node %s" %
7639 (inst.name, inst.primary_node))
7641 _ShutdownInstanceDisks(self, inst)
7644 class LUInstanceRecreateDisks(LogicalUnit):
7645 """Recreate an instance's missing disks.
7648 HPATH = "instance-recreate-disks"
7649 HTYPE = constants.HTYPE_INSTANCE
7652 _MODIFYABLE = compat.UniqueFrozenset([
7653 constants.IDISK_SIZE,
7654 constants.IDISK_MODE,
7657 # New or changed disk parameters may have different semantics
7658 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7659 constants.IDISK_ADOPT,
7661 # TODO: Implement support changing VG while recreating
7663 constants.IDISK_METAVG,
7664 constants.IDISK_PROVIDER,
7667 def _RunAllocator(self):
7668 """Run the allocator based on input opcode.
7671 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7674 # The allocator should actually run in "relocate" mode, but current
7675 # allocators don't support relocating all the nodes of an instance at
7676 # the same time. As a workaround we use "allocate" mode, but this is
7677 # suboptimal for two reasons:
7678 # - The instance name passed to the allocator is present in the list of
7679 # existing instances, so there could be a conflict within the
7680 # internal structures of the allocator. This doesn't happen with the
7681 # current allocators, but it's a liability.
7682 # - The allocator counts the resources used by the instance twice: once
7683 # because the instance exists already, and once because it tries to
7684 # allocate a new instance.
7685 # The allocator could choose some of the nodes on which the instance is
7686 # running, but that's not a problem. If the instance nodes are broken,
7687 # they should be already be marked as drained or offline, and hence
7688 # skipped by the allocator. If instance disks have been lost for other
7689 # reasons, then recreating the disks on the same nodes should be fine.
7690 disk_template = self.instance.disk_template
7691 spindle_use = be_full[constants.BE_SPINDLE_USE]
7692 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7693 disk_template=disk_template,
7694 tags=list(self.instance.GetTags()),
7695 os=self.instance.os,
7697 vcpus=be_full[constants.BE_VCPUS],
7698 memory=be_full[constants.BE_MAXMEM],
7699 spindle_use=spindle_use,
7700 disks=[{constants.IDISK_SIZE: d.size,
7701 constants.IDISK_MODE: d.mode}
7702 for d in self.instance.disks],
7703 hypervisor=self.instance.hypervisor,
7704 node_whitelist=None)
7705 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7707 ial.Run(self.op.iallocator)
7709 assert req.RequiredNodes() == len(self.instance.all_nodes)
7712 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7713 " %s" % (self.op.iallocator, ial.info),
7716 self.op.nodes = ial.result
7717 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7718 self.op.instance_name, self.op.iallocator,
7719 utils.CommaJoin(ial.result))
7721 def CheckArguments(self):
7722 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7723 # Normalize and convert deprecated list of disk indices
7724 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7726 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7728 raise errors.OpPrereqError("Some disks have been specified more than"
7729 " once: %s" % utils.CommaJoin(duplicates),
7732 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7733 # when neither iallocator nor nodes are specified
7734 if self.op.iallocator or self.op.nodes:
7735 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7737 for (idx, params) in self.op.disks:
7738 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7739 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7741 raise errors.OpPrereqError("Parameters for disk %s try to change"
7742 " unmodifyable parameter(s): %s" %
7743 (idx, utils.CommaJoin(unsupported)),
7746 def ExpandNames(self):
7747 self._ExpandAndLockInstance()
7748 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7751 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7752 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7754 self.needed_locks[locking.LEVEL_NODE] = []
7755 if self.op.iallocator:
7756 # iallocator will select a new node in the same group
7757 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7758 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7760 self.needed_locks[locking.LEVEL_NODE_RES] = []
7762 def DeclareLocks(self, level):
7763 if level == locking.LEVEL_NODEGROUP:
7764 assert self.op.iallocator is not None
7765 assert not self.op.nodes
7766 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7767 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7768 # Lock the primary group used by the instance optimistically; this
7769 # requires going via the node before it's locked, requiring
7770 # verification later on
7771 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7772 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7774 elif level == locking.LEVEL_NODE:
7775 # If an allocator is used, then we lock all the nodes in the current
7776 # instance group, as we don't know yet which ones will be selected;
7777 # if we replace the nodes without using an allocator, locks are
7778 # already declared in ExpandNames; otherwise, we need to lock all the
7779 # instance nodes for disk re-creation
7780 if self.op.iallocator:
7781 assert not self.op.nodes
7782 assert not self.needed_locks[locking.LEVEL_NODE]
7783 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7785 # Lock member nodes of the group of the primary node
7786 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7787 self.needed_locks[locking.LEVEL_NODE].extend(
7788 self.cfg.GetNodeGroup(group_uuid).members)
7790 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7791 elif not self.op.nodes:
7792 self._LockInstancesNodes(primary_only=False)
7793 elif level == locking.LEVEL_NODE_RES:
7795 self.needed_locks[locking.LEVEL_NODE_RES] = \
7796 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7798 def BuildHooksEnv(self):
7801 This runs on master, primary and secondary nodes of the instance.
7804 return _BuildInstanceHookEnvByObject(self, self.instance)
7806 def BuildHooksNodes(self):
7807 """Build hooks nodes.
7810 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7813 def CheckPrereq(self):
7814 """Check prerequisites.
7816 This checks that the instance is in the cluster and is not running.
7819 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7820 assert instance is not None, \
7821 "Cannot retrieve locked instance %s" % self.op.instance_name
7823 if len(self.op.nodes) != len(instance.all_nodes):
7824 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7825 " %d replacement nodes were specified" %
7826 (instance.name, len(instance.all_nodes),
7827 len(self.op.nodes)),
7829 assert instance.disk_template != constants.DT_DRBD8 or \
7830 len(self.op.nodes) == 2
7831 assert instance.disk_template != constants.DT_PLAIN or \
7832 len(self.op.nodes) == 1
7833 primary_node = self.op.nodes[0]
7835 primary_node = instance.primary_node
7836 if not self.op.iallocator:
7837 _CheckNodeOnline(self, primary_node)
7839 if instance.disk_template == constants.DT_DISKLESS:
7840 raise errors.OpPrereqError("Instance '%s' has no disks" %
7841 self.op.instance_name, errors.ECODE_INVAL)
7843 # Verify if node group locks are still correct
7844 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7846 # Node group locks are acquired only for the primary node (and only
7847 # when the allocator is used)
7848 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7851 # if we replace nodes *and* the old primary is offline, we don't
7852 # check the instance state
7853 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7854 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7855 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7856 msg="cannot recreate disks")
7859 self.disks = dict(self.op.disks)
7861 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7863 maxidx = max(self.disks.keys())
7864 if maxidx >= len(instance.disks):
7865 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7868 if ((self.op.nodes or self.op.iallocator) and
7869 sorted(self.disks.keys()) != range(len(instance.disks))):
7870 raise errors.OpPrereqError("Can't recreate disks partially and"
7871 " change the nodes at the same time",
7874 self.instance = instance
7876 if self.op.iallocator:
7877 self._RunAllocator()
7878 # Release unneeded node and node resource locks
7879 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7880 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7881 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7883 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7885 def Exec(self, feedback_fn):
7886 """Recreate the disks.
7889 instance = self.instance
7891 assert (self.owned_locks(locking.LEVEL_NODE) ==
7892 self.owned_locks(locking.LEVEL_NODE_RES))
7895 mods = [] # keeps track of needed changes
7897 for idx, disk in enumerate(instance.disks):
7899 changes = self.disks[idx]
7901 # Disk should not be recreated
7905 # update secondaries for disks, if needed
7906 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7907 # need to update the nodes and minors
7908 assert len(self.op.nodes) == 2
7909 assert len(disk.logical_id) == 6 # otherwise disk internals
7911 (_, _, old_port, _, _, old_secret) = disk.logical_id
7912 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7913 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7914 new_minors[0], new_minors[1], old_secret)
7915 assert len(disk.logical_id) == len(new_id)
7919 mods.append((idx, new_id, changes))
7921 # now that we have passed all asserts above, we can apply the mods
7922 # in a single run (to avoid partial changes)
7923 for idx, new_id, changes in mods:
7924 disk = instance.disks[idx]
7925 if new_id is not None:
7926 assert disk.dev_type == constants.LD_DRBD8
7927 disk.logical_id = new_id
7929 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7930 mode=changes.get(constants.IDISK_MODE, None))
7932 # change primary node, if needed
7934 instance.primary_node = self.op.nodes[0]
7935 self.LogWarning("Changing the instance's nodes, you will have to"
7936 " remove any disks left on the older nodes manually")
7939 self.cfg.Update(instance, feedback_fn)
7941 # All touched nodes must be locked
7942 mylocks = self.owned_locks(locking.LEVEL_NODE)
7943 assert mylocks.issuperset(frozenset(instance.all_nodes))
7944 _CreateDisks(self, instance, to_skip=to_skip)
7947 class LUInstanceRename(LogicalUnit):
7948 """Rename an instance.
7951 HPATH = "instance-rename"
7952 HTYPE = constants.HTYPE_INSTANCE
7954 def CheckArguments(self):
7958 if self.op.ip_check and not self.op.name_check:
7959 # TODO: make the ip check more flexible and not depend on the name check
7960 raise errors.OpPrereqError("IP address check requires a name check",
7963 def BuildHooksEnv(self):
7966 This runs on master, primary and secondary nodes of the instance.
7969 env = _BuildInstanceHookEnvByObject(self, self.instance)
7970 env["INSTANCE_NEW_NAME"] = self.op.new_name
7973 def BuildHooksNodes(self):
7974 """Build hooks nodes.
7977 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7980 def CheckPrereq(self):
7981 """Check prerequisites.
7983 This checks that the instance is in the cluster and is not running.
7986 self.op.instance_name = _ExpandInstanceName(self.cfg,
7987 self.op.instance_name)
7988 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7989 assert instance is not None
7990 _CheckNodeOnline(self, instance.primary_node)
7991 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7992 msg="cannot rename")
7993 self.instance = instance
7995 new_name = self.op.new_name
7996 if self.op.name_check:
7997 hostname = _CheckHostnameSane(self, new_name)
7998 new_name = self.op.new_name = hostname.name
7999 if (self.op.ip_check and
8000 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
8001 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8002 (hostname.ip, new_name),
8003 errors.ECODE_NOTUNIQUE)
8005 instance_list = self.cfg.GetInstanceList()
8006 if new_name in instance_list and new_name != instance.name:
8007 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8008 new_name, errors.ECODE_EXISTS)
8010 def Exec(self, feedback_fn):
8011 """Rename the instance.
8014 inst = self.instance
8015 old_name = inst.name
8017 rename_file_storage = False
8018 if (inst.disk_template in constants.DTS_FILEBASED and
8019 self.op.new_name != inst.name):
8020 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8021 rename_file_storage = True
8023 self.cfg.RenameInstance(inst.name, self.op.new_name)
8024 # Change the instance lock. This is definitely safe while we hold the BGL.
8025 # Otherwise the new lock would have to be added in acquired mode.
8027 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
8028 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
8029 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
8031 # re-read the instance from the configuration after rename
8032 inst = self.cfg.GetInstanceInfo(self.op.new_name)
8034 if rename_file_storage:
8035 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8036 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
8037 old_file_storage_dir,
8038 new_file_storage_dir)
8039 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8040 " (but the instance has been renamed in Ganeti)" %
8041 (inst.primary_node, old_file_storage_dir,
8042 new_file_storage_dir))
8044 _StartInstanceDisks(self, inst, None)
8045 # update info on disks
8046 info = _GetInstanceInfoText(inst)
8047 for (idx, disk) in enumerate(inst.disks):
8048 for node in inst.all_nodes:
8049 self.cfg.SetDiskID(disk, node)
8050 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8052 self.LogWarning("Error setting info on node %s for disk %s: %s",
8053 node, idx, result.fail_msg)
8055 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8056 old_name, self.op.debug_level)
8057 msg = result.fail_msg
8059 msg = ("Could not run OS rename script for instance %s on node %s"
8060 " (but the instance has been renamed in Ganeti): %s" %
8061 (inst.name, inst.primary_node, msg))
8062 self.LogWarning(msg)
8064 _ShutdownInstanceDisks(self, inst)
8069 class LUInstanceRemove(LogicalUnit):
8070 """Remove an instance.
8073 HPATH = "instance-remove"
8074 HTYPE = constants.HTYPE_INSTANCE
8077 def ExpandNames(self):
8078 self._ExpandAndLockInstance()
8079 self.needed_locks[locking.LEVEL_NODE] = []
8080 self.needed_locks[locking.LEVEL_NODE_RES] = []
8081 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8083 def DeclareLocks(self, level):
8084 if level == locking.LEVEL_NODE:
8085 self._LockInstancesNodes()
8086 elif level == locking.LEVEL_NODE_RES:
8088 self.needed_locks[locking.LEVEL_NODE_RES] = \
8089 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8091 def BuildHooksEnv(self):
8094 This runs on master, primary and secondary nodes of the instance.
8097 env = _BuildInstanceHookEnvByObject(self, self.instance)
8098 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8101 def BuildHooksNodes(self):
8102 """Build hooks nodes.
8105 nl = [self.cfg.GetMasterNode()]
8106 nl_post = list(self.instance.all_nodes) + nl
8107 return (nl, nl_post)
8109 def CheckPrereq(self):
8110 """Check prerequisites.
8112 This checks that the instance is in the cluster.
8115 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8116 assert self.instance is not None, \
8117 "Cannot retrieve locked instance %s" % self.op.instance_name
8119 def Exec(self, feedback_fn):
8120 """Remove the instance.
8123 instance = self.instance
8124 logging.info("Shutting down instance %s on node %s",
8125 instance.name, instance.primary_node)
8127 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8128 self.op.shutdown_timeout)
8129 msg = result.fail_msg
8131 if self.op.ignore_failures:
8132 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8134 raise errors.OpExecError("Could not shutdown instance %s on"
8136 (instance.name, instance.primary_node, msg))
8138 assert (self.owned_locks(locking.LEVEL_NODE) ==
8139 self.owned_locks(locking.LEVEL_NODE_RES))
8140 assert not (set(instance.all_nodes) -
8141 self.owned_locks(locking.LEVEL_NODE)), \
8142 "Not owning correct locks"
8144 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8147 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8148 """Utility function to remove an instance.
8151 logging.info("Removing block devices for instance %s", instance.name)
8153 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8154 if not ignore_failures:
8155 raise errors.OpExecError("Can't remove instance's disks")
8156 feedback_fn("Warning: can't remove instance's disks")
8158 logging.info("Removing instance %s out of cluster config", instance.name)
8160 lu.cfg.RemoveInstance(instance.name)
8162 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8163 "Instance lock removal conflict"
8165 # Remove lock for the instance
8166 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8169 class LUInstanceQuery(NoHooksLU):
8170 """Logical unit for querying instances.
8173 # pylint: disable=W0142
8176 def CheckArguments(self):
8177 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8178 self.op.output_fields, self.op.use_locking)
8180 def ExpandNames(self):
8181 self.iq.ExpandNames(self)
8183 def DeclareLocks(self, level):
8184 self.iq.DeclareLocks(self, level)
8186 def Exec(self, feedback_fn):
8187 return self.iq.OldStyleQuery(self)
8190 def _ExpandNamesForMigration(lu):
8191 """Expands names for use with L{TLMigrateInstance}.
8193 @type lu: L{LogicalUnit}
8196 if lu.op.target_node is not None:
8197 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8199 lu.needed_locks[locking.LEVEL_NODE] = []
8200 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8202 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8203 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8205 # The node allocation lock is actually only needed for externally replicated
8206 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8207 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8210 def _DeclareLocksForMigration(lu, level):
8211 """Declares locks for L{TLMigrateInstance}.
8213 @type lu: L{LogicalUnit}
8214 @param level: Lock level
8217 if level == locking.LEVEL_NODE_ALLOC:
8218 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8220 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8222 # Node locks are already declared here rather than at LEVEL_NODE as we need
8223 # the instance object anyway to declare the node allocation lock.
8224 if instance.disk_template in constants.DTS_EXT_MIRROR:
8225 if lu.op.target_node is None:
8226 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8227 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8229 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8231 del lu.recalculate_locks[locking.LEVEL_NODE]
8233 lu._LockInstancesNodes() # pylint: disable=W0212
8235 elif level == locking.LEVEL_NODE:
8236 # Node locks are declared together with the node allocation lock
8237 assert (lu.needed_locks[locking.LEVEL_NODE] or
8238 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8240 elif level == locking.LEVEL_NODE_RES:
8242 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8243 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8246 class LUInstanceFailover(LogicalUnit):
8247 """Failover an instance.
8250 HPATH = "instance-failover"
8251 HTYPE = constants.HTYPE_INSTANCE
8254 def CheckArguments(self):
8255 """Check the arguments.
8258 self.iallocator = getattr(self.op, "iallocator", None)
8259 self.target_node = getattr(self.op, "target_node", None)
8261 def ExpandNames(self):
8262 self._ExpandAndLockInstance()
8263 _ExpandNamesForMigration(self)
8266 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8267 self.op.ignore_consistency, True,
8268 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8270 self.tasklets = [self._migrater]
8272 def DeclareLocks(self, level):
8273 _DeclareLocksForMigration(self, level)
8275 def BuildHooksEnv(self):
8278 This runs on master, primary and secondary nodes of the instance.
8281 instance = self._migrater.instance
8282 source_node = instance.primary_node
8283 target_node = self.op.target_node
8285 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8286 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8287 "OLD_PRIMARY": source_node,
8288 "NEW_PRIMARY": target_node,
8291 if instance.disk_template in constants.DTS_INT_MIRROR:
8292 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8293 env["NEW_SECONDARY"] = source_node
8295 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8297 env.update(_BuildInstanceHookEnvByObject(self, instance))
8301 def BuildHooksNodes(self):
8302 """Build hooks nodes.
8305 instance = self._migrater.instance
8306 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8307 return (nl, nl + [instance.primary_node])
8310 class LUInstanceMigrate(LogicalUnit):
8311 """Migrate an instance.
8313 This is migration without shutting down, compared to the failover,
8314 which is done with shutdown.
8317 HPATH = "instance-migrate"
8318 HTYPE = constants.HTYPE_INSTANCE
8321 def ExpandNames(self):
8322 self._ExpandAndLockInstance()
8323 _ExpandNamesForMigration(self)
8326 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8327 False, self.op.allow_failover, False,
8328 self.op.allow_runtime_changes,
8329 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8330 self.op.ignore_ipolicy)
8332 self.tasklets = [self._migrater]
8334 def DeclareLocks(self, level):
8335 _DeclareLocksForMigration(self, level)
8337 def BuildHooksEnv(self):
8340 This runs on master, primary and secondary nodes of the instance.
8343 instance = self._migrater.instance
8344 source_node = instance.primary_node
8345 target_node = self.op.target_node
8346 env = _BuildInstanceHookEnvByObject(self, instance)
8348 "MIGRATE_LIVE": self._migrater.live,
8349 "MIGRATE_CLEANUP": self.op.cleanup,
8350 "OLD_PRIMARY": source_node,
8351 "NEW_PRIMARY": target_node,
8352 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8355 if instance.disk_template in constants.DTS_INT_MIRROR:
8356 env["OLD_SECONDARY"] = target_node
8357 env["NEW_SECONDARY"] = source_node
8359 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8363 def BuildHooksNodes(self):
8364 """Build hooks nodes.
8367 instance = self._migrater.instance
8368 snodes = list(instance.secondary_nodes)
8369 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8373 class LUInstanceMove(LogicalUnit):
8374 """Move an instance by data-copying.
8377 HPATH = "instance-move"
8378 HTYPE = constants.HTYPE_INSTANCE
8381 def ExpandNames(self):
8382 self._ExpandAndLockInstance()
8383 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8384 self.op.target_node = target_node
8385 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8386 self.needed_locks[locking.LEVEL_NODE_RES] = []
8387 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8389 def DeclareLocks(self, level):
8390 if level == locking.LEVEL_NODE:
8391 self._LockInstancesNodes(primary_only=True)
8392 elif level == locking.LEVEL_NODE_RES:
8394 self.needed_locks[locking.LEVEL_NODE_RES] = \
8395 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8397 def BuildHooksEnv(self):
8400 This runs on master, primary and secondary nodes of the instance.
8404 "TARGET_NODE": self.op.target_node,
8405 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8407 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8410 def BuildHooksNodes(self):
8411 """Build hooks nodes.
8415 self.cfg.GetMasterNode(),
8416 self.instance.primary_node,
8417 self.op.target_node,
8421 def CheckPrereq(self):
8422 """Check prerequisites.
8424 This checks that the instance is in the cluster.
8427 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8428 assert self.instance is not None, \
8429 "Cannot retrieve locked instance %s" % self.op.instance_name
8431 node = self.cfg.GetNodeInfo(self.op.target_node)
8432 assert node is not None, \
8433 "Cannot retrieve locked node %s" % self.op.target_node
8435 self.target_node = target_node = node.name
8437 if target_node == instance.primary_node:
8438 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8439 (instance.name, target_node),
8442 bep = self.cfg.GetClusterInfo().FillBE(instance)
8444 for idx, dsk in enumerate(instance.disks):
8445 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8446 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8447 " cannot copy" % idx, errors.ECODE_STATE)
8449 _CheckNodeOnline(self, target_node)
8450 _CheckNodeNotDrained(self, target_node)
8451 _CheckNodeVmCapable(self, target_node)
8452 cluster = self.cfg.GetClusterInfo()
8453 group_info = self.cfg.GetNodeGroup(node.group)
8454 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8455 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8456 ignore=self.op.ignore_ipolicy)
8458 if instance.admin_state == constants.ADMINST_UP:
8459 # check memory requirements on the secondary node
8460 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8461 instance.name, bep[constants.BE_MAXMEM],
8462 instance.hypervisor)
8464 self.LogInfo("Not checking memory on the secondary node as"
8465 " instance will not be started")
8467 # check bridge existance
8468 _CheckInstanceBridgesExist(self, instance, node=target_node)
8470 def Exec(self, feedback_fn):
8471 """Move an instance.
8473 The move is done by shutting it down on its present node, copying
8474 the data over (slow) and starting it on the new node.
8477 instance = self.instance
8479 source_node = instance.primary_node
8480 target_node = self.target_node
8482 self.LogInfo("Shutting down instance %s on source node %s",
8483 instance.name, source_node)
8485 assert (self.owned_locks(locking.LEVEL_NODE) ==
8486 self.owned_locks(locking.LEVEL_NODE_RES))
8488 result = self.rpc.call_instance_shutdown(source_node, instance,
8489 self.op.shutdown_timeout)
8490 msg = result.fail_msg
8492 if self.op.ignore_consistency:
8493 self.LogWarning("Could not shutdown instance %s on node %s."
8494 " Proceeding anyway. Please make sure node"
8495 " %s is down. Error details: %s",
8496 instance.name, source_node, source_node, msg)
8498 raise errors.OpExecError("Could not shutdown instance %s on"
8500 (instance.name, source_node, msg))
8502 # create the target disks
8504 _CreateDisks(self, instance, target_node=target_node)
8505 except errors.OpExecError:
8506 self.LogWarning("Device creation failed, reverting...")
8508 _RemoveDisks(self, instance, target_node=target_node)
8510 self.cfg.ReleaseDRBDMinors(instance.name)
8513 cluster_name = self.cfg.GetClusterInfo().cluster_name
8516 # activate, get path, copy the data over
8517 for idx, disk in enumerate(instance.disks):
8518 self.LogInfo("Copying data for disk %d", idx)
8519 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8520 instance.name, True, idx)
8522 self.LogWarning("Can't assemble newly created disk %d: %s",
8523 idx, result.fail_msg)
8524 errs.append(result.fail_msg)
8526 dev_path = result.payload
8527 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8528 target_node, dev_path,
8531 self.LogWarning("Can't copy data over for disk %d: %s",
8532 idx, result.fail_msg)
8533 errs.append(result.fail_msg)
8537 self.LogWarning("Some disks failed to copy, aborting")
8539 _RemoveDisks(self, instance, target_node=target_node)
8541 self.cfg.ReleaseDRBDMinors(instance.name)
8542 raise errors.OpExecError("Errors during disk copy: %s" %
8545 instance.primary_node = target_node
8546 self.cfg.Update(instance, feedback_fn)
8548 self.LogInfo("Removing the disks on the original node")
8549 _RemoveDisks(self, instance, target_node=source_node)
8551 # Only start the instance if it's marked as up
8552 if instance.admin_state == constants.ADMINST_UP:
8553 self.LogInfo("Starting instance %s on node %s",
8554 instance.name, target_node)
8556 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8557 ignore_secondaries=True)
8559 _ShutdownInstanceDisks(self, instance)
8560 raise errors.OpExecError("Can't activate the instance's disks")
8562 result = self.rpc.call_instance_start(target_node,
8563 (instance, None, None), False)
8564 msg = result.fail_msg
8566 _ShutdownInstanceDisks(self, instance)
8567 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8568 (instance.name, target_node, msg))
8571 class LUNodeMigrate(LogicalUnit):
8572 """Migrate all instances from a node.
8575 HPATH = "node-migrate"
8576 HTYPE = constants.HTYPE_NODE
8579 def CheckArguments(self):
8582 def ExpandNames(self):
8583 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8585 self.share_locks = _ShareAll()
8586 self.needed_locks = {
8587 locking.LEVEL_NODE: [self.op.node_name],
8590 def BuildHooksEnv(self):
8593 This runs on the master, the primary and all the secondaries.
8597 "NODE_NAME": self.op.node_name,
8598 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8601 def BuildHooksNodes(self):
8602 """Build hooks nodes.
8605 nl = [self.cfg.GetMasterNode()]
8608 def CheckPrereq(self):
8611 def Exec(self, feedback_fn):
8612 # Prepare jobs for migration instances
8613 allow_runtime_changes = self.op.allow_runtime_changes
8615 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8618 iallocator=self.op.iallocator,
8619 target_node=self.op.target_node,
8620 allow_runtime_changes=allow_runtime_changes,
8621 ignore_ipolicy=self.op.ignore_ipolicy)]
8622 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8624 # TODO: Run iallocator in this opcode and pass correct placement options to
8625 # OpInstanceMigrate. Since other jobs can modify the cluster between
8626 # running the iallocator and the actual migration, a good consistency model
8627 # will have to be found.
8629 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8630 frozenset([self.op.node_name]))
8632 return ResultWithJobs(jobs)
8635 class TLMigrateInstance(Tasklet):
8636 """Tasklet class for instance migration.
8639 @ivar live: whether the migration will be done live or non-live;
8640 this variable is initalized only after CheckPrereq has run
8641 @type cleanup: boolean
8642 @ivar cleanup: Wheater we cleanup from a failed migration
8643 @type iallocator: string
8644 @ivar iallocator: The iallocator used to determine target_node
8645 @type target_node: string
8646 @ivar target_node: If given, the target_node to reallocate the instance to
8647 @type failover: boolean
8648 @ivar failover: Whether operation results in failover or migration
8649 @type fallback: boolean
8650 @ivar fallback: Whether fallback to failover is allowed if migration not
8652 @type ignore_consistency: boolean
8653 @ivar ignore_consistency: Wheter we should ignore consistency between source
8655 @type shutdown_timeout: int
8656 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8657 @type ignore_ipolicy: bool
8658 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8663 _MIGRATION_POLL_INTERVAL = 1 # seconds
8664 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8666 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8667 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8669 """Initializes this class.
8672 Tasklet.__init__(self, lu)
8675 self.instance_name = instance_name
8676 self.cleanup = cleanup
8677 self.live = False # will be overridden later
8678 self.failover = failover
8679 self.fallback = fallback
8680 self.ignore_consistency = ignore_consistency
8681 self.shutdown_timeout = shutdown_timeout
8682 self.ignore_ipolicy = ignore_ipolicy
8683 self.allow_runtime_changes = allow_runtime_changes
8685 def CheckPrereq(self):
8686 """Check prerequisites.
8688 This checks that the instance is in the cluster.
8691 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8692 instance = self.cfg.GetInstanceInfo(instance_name)
8693 assert instance is not None
8694 self.instance = instance
8695 cluster = self.cfg.GetClusterInfo()
8697 if (not self.cleanup and
8698 not instance.admin_state == constants.ADMINST_UP and
8699 not self.failover and self.fallback):
8700 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8701 " switching to failover")
8702 self.failover = True
8704 if instance.disk_template not in constants.DTS_MIRRORED:
8709 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8710 " %s" % (instance.disk_template, text),
8713 if instance.disk_template in constants.DTS_EXT_MIRROR:
8714 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8716 if self.lu.op.iallocator:
8717 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8718 self._RunAllocator()
8720 # We set set self.target_node as it is required by
8722 self.target_node = self.lu.op.target_node
8724 # Check that the target node is correct in terms of instance policy
8725 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8726 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8727 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8729 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8730 ignore=self.ignore_ipolicy)
8732 # self.target_node is already populated, either directly or by the
8734 target_node = self.target_node
8735 if self.target_node == instance.primary_node:
8736 raise errors.OpPrereqError("Cannot migrate instance %s"
8737 " to its primary (%s)" %
8738 (instance.name, instance.primary_node),
8741 if len(self.lu.tasklets) == 1:
8742 # It is safe to release locks only when we're the only tasklet
8744 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8745 keep=[instance.primary_node, self.target_node])
8746 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8749 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8751 secondary_nodes = instance.secondary_nodes
8752 if not secondary_nodes:
8753 raise errors.ConfigurationError("No secondary node but using"
8754 " %s disk template" %
8755 instance.disk_template)
8756 target_node = secondary_nodes[0]
8757 if self.lu.op.iallocator or (self.lu.op.target_node and
8758 self.lu.op.target_node != target_node):
8760 text = "failed over"
8763 raise errors.OpPrereqError("Instances with disk template %s cannot"
8764 " be %s to arbitrary nodes"
8765 " (neither an iallocator nor a target"
8766 " node can be passed)" %
8767 (instance.disk_template, text),
8769 nodeinfo = self.cfg.GetNodeInfo(target_node)
8770 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8771 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8773 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8774 ignore=self.ignore_ipolicy)
8776 i_be = cluster.FillBE(instance)
8778 # check memory requirements on the secondary node
8779 if (not self.cleanup and
8780 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8781 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8782 "migrating instance %s" %
8784 i_be[constants.BE_MINMEM],
8785 instance.hypervisor)
8787 self.lu.LogInfo("Not checking memory on the secondary node as"
8788 " instance will not be started")
8790 # check if failover must be forced instead of migration
8791 if (not self.cleanup and not self.failover and
8792 i_be[constants.BE_ALWAYS_FAILOVER]):
8793 self.lu.LogInfo("Instance configured to always failover; fallback"
8795 self.failover = True
8797 # check bridge existance
8798 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8800 if not self.cleanup:
8801 _CheckNodeNotDrained(self.lu, target_node)
8802 if not self.failover:
8803 result = self.rpc.call_instance_migratable(instance.primary_node,
8805 if result.fail_msg and self.fallback:
8806 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8808 self.failover = True
8810 result.Raise("Can't migrate, please use failover",
8811 prereq=True, ecode=errors.ECODE_STATE)
8813 assert not (self.failover and self.cleanup)
8815 if not self.failover:
8816 if self.lu.op.live is not None and self.lu.op.mode is not None:
8817 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8818 " parameters are accepted",
8820 if self.lu.op.live is not None:
8822 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8824 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8825 # reset the 'live' parameter to None so that repeated
8826 # invocations of CheckPrereq do not raise an exception
8827 self.lu.op.live = None
8828 elif self.lu.op.mode is None:
8829 # read the default value from the hypervisor
8830 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8831 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8833 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8835 # Failover is never live
8838 if not (self.failover or self.cleanup):
8839 remote_info = self.rpc.call_instance_info(instance.primary_node,
8841 instance.hypervisor)
8842 remote_info.Raise("Error checking instance on node %s" %
8843 instance.primary_node)
8844 instance_running = bool(remote_info.payload)
8845 if instance_running:
8846 self.current_mem = int(remote_info.payload["memory"])
8848 def _RunAllocator(self):
8849 """Run the allocator based on input opcode.
8852 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8854 # FIXME: add a self.ignore_ipolicy option
8855 req = iallocator.IAReqRelocate(name=self.instance_name,
8856 relocate_from=[self.instance.primary_node])
8857 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8859 ial.Run(self.lu.op.iallocator)
8862 raise errors.OpPrereqError("Can't compute nodes using"
8863 " iallocator '%s': %s" %
8864 (self.lu.op.iallocator, ial.info),
8866 self.target_node = ial.result[0]
8867 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8868 self.instance_name, self.lu.op.iallocator,
8869 utils.CommaJoin(ial.result))
8871 def _WaitUntilSync(self):
8872 """Poll with custom rpc for disk sync.
8874 This uses our own step-based rpc call.
8877 self.feedback_fn("* wait until resync is done")
8881 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8883 (self.instance.disks,
8886 for node, nres in result.items():
8887 nres.Raise("Cannot resync disks on node %s" % node)
8888 node_done, node_percent = nres.payload
8889 all_done = all_done and node_done
8890 if node_percent is not None:
8891 min_percent = min(min_percent, node_percent)
8893 if min_percent < 100:
8894 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8897 def _EnsureSecondary(self, node):
8898 """Demote a node to secondary.
8901 self.feedback_fn("* switching node %s to secondary mode" % node)
8903 for dev in self.instance.disks:
8904 self.cfg.SetDiskID(dev, node)
8906 result = self.rpc.call_blockdev_close(node, self.instance.name,
8907 self.instance.disks)
8908 result.Raise("Cannot change disk to secondary on node %s" % node)
8910 def _GoStandalone(self):
8911 """Disconnect from the network.
8914 self.feedback_fn("* changing into standalone mode")
8915 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8916 self.instance.disks)
8917 for node, nres in result.items():
8918 nres.Raise("Cannot disconnect disks node %s" % node)
8920 def _GoReconnect(self, multimaster):
8921 """Reconnect to the network.
8927 msg = "single-master"
8928 self.feedback_fn("* changing disks into %s mode" % msg)
8929 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8930 (self.instance.disks, self.instance),
8931 self.instance.name, multimaster)
8932 for node, nres in result.items():
8933 nres.Raise("Cannot change disks config on node %s" % node)
8935 def _ExecCleanup(self):
8936 """Try to cleanup after a failed migration.
8938 The cleanup is done by:
8939 - check that the instance is running only on one node
8940 (and update the config if needed)
8941 - change disks on its secondary node to secondary
8942 - wait until disks are fully synchronized
8943 - disconnect from the network
8944 - change disks into single-master mode
8945 - wait again until disks are fully synchronized
8948 instance = self.instance
8949 target_node = self.target_node
8950 source_node = self.source_node
8952 # check running on only one node
8953 self.feedback_fn("* checking where the instance actually runs"
8954 " (if this hangs, the hypervisor might be in"
8956 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8957 for node, result in ins_l.items():
8958 result.Raise("Can't contact node %s" % node)
8960 runningon_source = instance.name in ins_l[source_node].payload
8961 runningon_target = instance.name in ins_l[target_node].payload
8963 if runningon_source and runningon_target:
8964 raise errors.OpExecError("Instance seems to be running on two nodes,"
8965 " or the hypervisor is confused; you will have"
8966 " to ensure manually that it runs only on one"
8967 " and restart this operation")
8969 if not (runningon_source or runningon_target):
8970 raise errors.OpExecError("Instance does not seem to be running at all;"
8971 " in this case it's safer to repair by"
8972 " running 'gnt-instance stop' to ensure disk"
8973 " shutdown, and then restarting it")
8975 if runningon_target:
8976 # the migration has actually succeeded, we need to update the config
8977 self.feedback_fn("* instance running on secondary node (%s),"
8978 " updating config" % target_node)
8979 instance.primary_node = target_node
8980 self.cfg.Update(instance, self.feedback_fn)
8981 demoted_node = source_node
8983 self.feedback_fn("* instance confirmed to be running on its"
8984 " primary node (%s)" % source_node)
8985 demoted_node = target_node
8987 if instance.disk_template in constants.DTS_INT_MIRROR:
8988 self._EnsureSecondary(demoted_node)
8990 self._WaitUntilSync()
8991 except errors.OpExecError:
8992 # we ignore here errors, since if the device is standalone, it
8993 # won't be able to sync
8995 self._GoStandalone()
8996 self._GoReconnect(False)
8997 self._WaitUntilSync()
8999 self.feedback_fn("* done")
9001 def _RevertDiskStatus(self):
9002 """Try to revert the disk status after a failed migration.
9005 target_node = self.target_node
9006 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
9010 self._EnsureSecondary(target_node)
9011 self._GoStandalone()
9012 self._GoReconnect(False)
9013 self._WaitUntilSync()
9014 except errors.OpExecError, err:
9015 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
9016 " please try to recover the instance manually;"
9017 " error '%s'" % str(err))
9019 def _AbortMigration(self):
9020 """Call the hypervisor code to abort a started migration.
9023 instance = self.instance
9024 target_node = self.target_node
9025 source_node = self.source_node
9026 migration_info = self.migration_info
9028 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
9032 abort_msg = abort_result.fail_msg
9034 logging.error("Aborting migration failed on target node %s: %s",
9035 target_node, abort_msg)
9036 # Don't raise an exception here, as we stil have to try to revert the
9037 # disk status, even if this step failed.
9039 abort_result = self.rpc.call_instance_finalize_migration_src(
9040 source_node, instance, False, self.live)
9041 abort_msg = abort_result.fail_msg
9043 logging.error("Aborting migration failed on source node %s: %s",
9044 source_node, abort_msg)
9046 def _ExecMigration(self):
9047 """Migrate an instance.
9049 The migrate is done by:
9050 - change the disks into dual-master mode
9051 - wait until disks are fully synchronized again
9052 - migrate the instance
9053 - change disks on the new secondary node (the old primary) to secondary
9054 - wait until disks are fully synchronized
9055 - change disks into single-master mode
9058 instance = self.instance
9059 target_node = self.target_node
9060 source_node = self.source_node
9062 # Check for hypervisor version mismatch and warn the user.
9063 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9064 None, [self.instance.hypervisor], False)
9065 for ninfo in nodeinfo.values():
9066 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9068 (_, _, (src_info, )) = nodeinfo[source_node].payload
9069 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9071 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9072 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9073 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9074 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9075 if src_version != dst_version:
9076 self.feedback_fn("* warning: hypervisor version mismatch between"
9077 " source (%s) and target (%s) node" %
9078 (src_version, dst_version))
9080 self.feedback_fn("* checking disk consistency between source and target")
9081 for (idx, dev) in enumerate(instance.disks):
9082 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9083 raise errors.OpExecError("Disk %s is degraded or not fully"
9084 " synchronized on target node,"
9085 " aborting migration" % idx)
9087 if self.current_mem > self.tgt_free_mem:
9088 if not self.allow_runtime_changes:
9089 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9090 " free memory to fit instance %s on target"
9091 " node %s (have %dMB, need %dMB)" %
9092 (instance.name, target_node,
9093 self.tgt_free_mem, self.current_mem))
9094 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9095 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9098 rpcres.Raise("Cannot modify instance runtime memory")
9100 # First get the migration information from the remote node
9101 result = self.rpc.call_migration_info(source_node, instance)
9102 msg = result.fail_msg
9104 log_err = ("Failed fetching source migration information from %s: %s" %
9106 logging.error(log_err)
9107 raise errors.OpExecError(log_err)
9109 self.migration_info = migration_info = result.payload
9111 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9112 # Then switch the disks to master/master mode
9113 self._EnsureSecondary(target_node)
9114 self._GoStandalone()
9115 self._GoReconnect(True)
9116 self._WaitUntilSync()
9118 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9119 result = self.rpc.call_accept_instance(target_node,
9122 self.nodes_ip[target_node])
9124 msg = result.fail_msg
9126 logging.error("Instance pre-migration failed, trying to revert"
9127 " disk status: %s", msg)
9128 self.feedback_fn("Pre-migration failed, aborting")
9129 self._AbortMigration()
9130 self._RevertDiskStatus()
9131 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9132 (instance.name, msg))
9134 self.feedback_fn("* migrating instance to %s" % target_node)
9135 result = self.rpc.call_instance_migrate(source_node, instance,
9136 self.nodes_ip[target_node],
9138 msg = result.fail_msg
9140 logging.error("Instance migration failed, trying to revert"
9141 " disk status: %s", msg)
9142 self.feedback_fn("Migration failed, aborting")
9143 self._AbortMigration()
9144 self._RevertDiskStatus()
9145 raise errors.OpExecError("Could not migrate instance %s: %s" %
9146 (instance.name, msg))
9148 self.feedback_fn("* starting memory transfer")
9149 last_feedback = time.time()
9151 result = self.rpc.call_instance_get_migration_status(source_node,
9153 msg = result.fail_msg
9154 ms = result.payload # MigrationStatus instance
9155 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9156 logging.error("Instance migration failed, trying to revert"
9157 " disk status: %s", msg)
9158 self.feedback_fn("Migration failed, aborting")
9159 self._AbortMigration()
9160 self._RevertDiskStatus()
9162 msg = "hypervisor returned failure"
9163 raise errors.OpExecError("Could not migrate instance %s: %s" %
9164 (instance.name, msg))
9166 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9167 self.feedback_fn("* memory transfer complete")
9170 if (utils.TimeoutExpired(last_feedback,
9171 self._MIGRATION_FEEDBACK_INTERVAL) and
9172 ms.transferred_ram is not None):
9173 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9174 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9175 last_feedback = time.time()
9177 time.sleep(self._MIGRATION_POLL_INTERVAL)
9179 result = self.rpc.call_instance_finalize_migration_src(source_node,
9183 msg = result.fail_msg
9185 logging.error("Instance migration succeeded, but finalization failed"
9186 " on the source node: %s", msg)
9187 raise errors.OpExecError("Could not finalize instance migration: %s" %
9190 instance.primary_node = target_node
9192 # distribute new instance config to the other nodes
9193 self.cfg.Update(instance, self.feedback_fn)
9195 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9199 msg = result.fail_msg
9201 logging.error("Instance migration succeeded, but finalization failed"
9202 " on the target node: %s", msg)
9203 raise errors.OpExecError("Could not finalize instance migration: %s" %
9206 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9207 self._EnsureSecondary(source_node)
9208 self._WaitUntilSync()
9209 self._GoStandalone()
9210 self._GoReconnect(False)
9211 self._WaitUntilSync()
9213 # If the instance's disk template is `rbd' or `ext' and there was a
9214 # successful migration, unmap the device from the source node.
9215 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9216 disks = _ExpandCheckDisks(instance, instance.disks)
9217 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9219 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9220 msg = result.fail_msg
9222 logging.error("Migration was successful, but couldn't unmap the"
9223 " block device %s on source node %s: %s",
9224 disk.iv_name, source_node, msg)
9225 logging.error("You need to unmap the device %s manually on %s",
9226 disk.iv_name, source_node)
9228 self.feedback_fn("* done")
9230 def _ExecFailover(self):
9231 """Failover an instance.
9233 The failover is done by shutting it down on its present node and
9234 starting it on the secondary.
9237 instance = self.instance
9238 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9240 source_node = instance.primary_node
9241 target_node = self.target_node
9243 if instance.admin_state == constants.ADMINST_UP:
9244 self.feedback_fn("* checking disk consistency between source and target")
9245 for (idx, dev) in enumerate(instance.disks):
9246 # for drbd, these are drbd over lvm
9247 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9249 if primary_node.offline:
9250 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9252 (primary_node.name, idx, target_node))
9253 elif not self.ignore_consistency:
9254 raise errors.OpExecError("Disk %s is degraded on target node,"
9255 " aborting failover" % idx)
9257 self.feedback_fn("* not checking disk consistency as instance is not"
9260 self.feedback_fn("* shutting down instance on source node")
9261 logging.info("Shutting down instance %s on node %s",
9262 instance.name, source_node)
9264 result = self.rpc.call_instance_shutdown(source_node, instance,
9265 self.shutdown_timeout)
9266 msg = result.fail_msg
9268 if self.ignore_consistency or primary_node.offline:
9269 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9270 " proceeding anyway; please make sure node"
9271 " %s is down; error details: %s",
9272 instance.name, source_node, source_node, msg)
9274 raise errors.OpExecError("Could not shutdown instance %s on"
9276 (instance.name, source_node, msg))
9278 self.feedback_fn("* deactivating the instance's disks on source node")
9279 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9280 raise errors.OpExecError("Can't shut down the instance's disks")
9282 instance.primary_node = target_node
9283 # distribute new instance config to the other nodes
9284 self.cfg.Update(instance, self.feedback_fn)
9286 # Only start the instance if it's marked as up
9287 if instance.admin_state == constants.ADMINST_UP:
9288 self.feedback_fn("* activating the instance's disks on target node %s" %
9290 logging.info("Starting instance %s on node %s",
9291 instance.name, target_node)
9293 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9294 ignore_secondaries=True)
9296 _ShutdownInstanceDisks(self.lu, instance)
9297 raise errors.OpExecError("Can't activate the instance's disks")
9299 self.feedback_fn("* starting the instance on the target node %s" %
9301 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9303 msg = result.fail_msg
9305 _ShutdownInstanceDisks(self.lu, instance)
9306 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9307 (instance.name, target_node, msg))
9309 def Exec(self, feedback_fn):
9310 """Perform the migration.
9313 self.feedback_fn = feedback_fn
9314 self.source_node = self.instance.primary_node
9316 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9317 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9318 self.target_node = self.instance.secondary_nodes[0]
9319 # Otherwise self.target_node has been populated either
9320 # directly, or through an iallocator.
9322 self.all_nodes = [self.source_node, self.target_node]
9323 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9324 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9327 feedback_fn("Failover instance %s" % self.instance.name)
9328 self._ExecFailover()
9330 feedback_fn("Migrating instance %s" % self.instance.name)
9333 return self._ExecCleanup()
9335 return self._ExecMigration()
9338 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9340 """Wrapper around L{_CreateBlockDevInner}.
9342 This method annotates the root device first.
9345 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9346 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9347 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9348 force_open, excl_stor)
9351 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9352 info, force_open, excl_stor):
9353 """Create a tree of block devices on a given node.
9355 If this device type has to be created on secondaries, create it and
9358 If not, just recurse to children keeping the same 'force' value.
9360 @attention: The device has to be annotated already.
9362 @param lu: the lu on whose behalf we execute
9363 @param node: the node on which to create the device
9364 @type instance: L{objects.Instance}
9365 @param instance: the instance which owns the device
9366 @type device: L{objects.Disk}
9367 @param device: the device to create
9368 @type force_create: boolean
9369 @param force_create: whether to force creation of this device; this
9370 will be change to True whenever we find a device which has
9371 CreateOnSecondary() attribute
9372 @param info: the extra 'metadata' we should attach to the device
9373 (this will be represented as a LVM tag)
9374 @type force_open: boolean
9375 @param force_open: this parameter will be passes to the
9376 L{backend.BlockdevCreate} function where it specifies
9377 whether we run on primary or not, and it affects both
9378 the child assembly and the device own Open() execution
9379 @type excl_stor: boolean
9380 @param excl_stor: Whether exclusive_storage is active for the node
9383 if device.CreateOnSecondary():
9387 for child in device.children:
9388 _CreateBlockDevInner(lu, node, instance, child, force_create,
9389 info, force_open, excl_stor)
9391 if not force_create:
9394 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9398 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9400 """Create a single block device on a given node.
9402 This will not recurse over children of the device, so they must be
9405 @param lu: the lu on whose behalf we execute
9406 @param node: the node on which to create the device
9407 @type instance: L{objects.Instance}
9408 @param instance: the instance which owns the device
9409 @type device: L{objects.Disk}
9410 @param device: the device to create
9411 @param info: the extra 'metadata' we should attach to the device
9412 (this will be represented as a LVM tag)
9413 @type force_open: boolean
9414 @param force_open: this parameter will be passes to the
9415 L{backend.BlockdevCreate} function where it specifies
9416 whether we run on primary or not, and it affects both
9417 the child assembly and the device own Open() execution
9418 @type excl_stor: boolean
9419 @param excl_stor: Whether exclusive_storage is active for the node
9422 lu.cfg.SetDiskID(device, node)
9423 result = lu.rpc.call_blockdev_create(node, device, device.size,
9424 instance.name, force_open, info,
9426 result.Raise("Can't create block device %s on"
9427 " node %s for instance %s" % (device, node, instance.name))
9428 if device.physical_id is None:
9429 device.physical_id = result.payload
9432 def _GenerateUniqueNames(lu, exts):
9433 """Generate a suitable LV name.
9435 This will generate a logical volume name for the given instance.
9440 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9441 results.append("%s%s" % (new_id, val))
9445 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9446 iv_name, p_minor, s_minor):
9447 """Generate a drbd8 device complete with its children.
9450 assert len(vgnames) == len(names) == 2
9451 port = lu.cfg.AllocatePort()
9452 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9454 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9455 logical_id=(vgnames[0], names[0]),
9457 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9458 size=constants.DRBD_META_SIZE,
9459 logical_id=(vgnames[1], names[1]),
9461 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9462 logical_id=(primary, secondary, port,
9465 children=[dev_data, dev_meta],
9466 iv_name=iv_name, params={})
9470 _DISK_TEMPLATE_NAME_PREFIX = {
9471 constants.DT_PLAIN: "",
9472 constants.DT_RBD: ".rbd",
9473 constants.DT_EXT: ".ext",
9477 _DISK_TEMPLATE_DEVICE_TYPE = {
9478 constants.DT_PLAIN: constants.LD_LV,
9479 constants.DT_FILE: constants.LD_FILE,
9480 constants.DT_SHARED_FILE: constants.LD_FILE,
9481 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9482 constants.DT_RBD: constants.LD_RBD,
9483 constants.DT_EXT: constants.LD_EXT,
9487 def _GenerateDiskTemplate(
9488 lu, template_name, instance_name, primary_node, secondary_nodes,
9489 disk_info, file_storage_dir, file_driver, base_index,
9490 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9491 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9492 """Generate the entire disk layout for a given template type.
9495 vgname = lu.cfg.GetVGName()
9496 disk_count = len(disk_info)
9499 if template_name == constants.DT_DISKLESS:
9501 elif template_name == constants.DT_DRBD8:
9502 if len(secondary_nodes) != 1:
9503 raise errors.ProgrammerError("Wrong template configuration")
9504 remote_node = secondary_nodes[0]
9505 minors = lu.cfg.AllocateDRBDMinor(
9506 [primary_node, remote_node] * len(disk_info), instance_name)
9508 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9510 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9513 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9514 for i in range(disk_count)]):
9515 names.append(lv_prefix + "_data")
9516 names.append(lv_prefix + "_meta")
9517 for idx, disk in enumerate(disk_info):
9518 disk_index = idx + base_index
9519 data_vg = disk.get(constants.IDISK_VG, vgname)
9520 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9521 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9522 disk[constants.IDISK_SIZE],
9524 names[idx * 2:idx * 2 + 2],
9525 "disk/%d" % disk_index,
9526 minors[idx * 2], minors[idx * 2 + 1])
9527 disk_dev.mode = disk[constants.IDISK_MODE]
9528 disks.append(disk_dev)
9531 raise errors.ProgrammerError("Wrong template configuration")
9533 if template_name == constants.DT_FILE:
9535 elif template_name == constants.DT_SHARED_FILE:
9536 _req_shr_file_storage()
9538 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9539 if name_prefix is None:
9542 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9543 (name_prefix, base_index + i)
9544 for i in range(disk_count)])
9546 if template_name == constants.DT_PLAIN:
9548 def logical_id_fn(idx, _, disk):
9549 vg = disk.get(constants.IDISK_VG, vgname)
9550 return (vg, names[idx])
9552 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9554 lambda _, disk_index, disk: (file_driver,
9555 "%s/disk%d" % (file_storage_dir,
9557 elif template_name == constants.DT_BLOCK:
9559 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9560 disk[constants.IDISK_ADOPT])
9561 elif template_name == constants.DT_RBD:
9562 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9563 elif template_name == constants.DT_EXT:
9564 def logical_id_fn(idx, _, disk):
9565 provider = disk.get(constants.IDISK_PROVIDER, None)
9566 if provider is None:
9567 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9568 " not found", constants.DT_EXT,
9569 constants.IDISK_PROVIDER)
9570 return (provider, names[idx])
9572 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9574 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9576 for idx, disk in enumerate(disk_info):
9578 # Only for the Ext template add disk_info to params
9579 if template_name == constants.DT_EXT:
9580 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9582 if key not in constants.IDISK_PARAMS:
9583 params[key] = disk[key]
9584 disk_index = idx + base_index
9585 size = disk[constants.IDISK_SIZE]
9586 feedback_fn("* disk %s, size %s" %
9587 (disk_index, utils.FormatUnit(size, "h")))
9588 disks.append(objects.Disk(dev_type=dev_type, size=size,
9589 logical_id=logical_id_fn(idx, disk_index, disk),
9590 iv_name="disk/%d" % disk_index,
9591 mode=disk[constants.IDISK_MODE],
9597 def _GetInstanceInfoText(instance):
9598 """Compute that text that should be added to the disk's metadata.
9601 return "originstname+%s" % instance.name
9604 def _CalcEta(time_taken, written, total_size):
9605 """Calculates the ETA based on size written and total size.
9607 @param time_taken: The time taken so far
9608 @param written: amount written so far
9609 @param total_size: The total size of data to be written
9610 @return: The remaining time in seconds
9613 avg_time = time_taken / float(written)
9614 return (total_size - written) * avg_time
9617 def _WipeDisks(lu, instance, disks=None):
9618 """Wipes instance disks.
9620 @type lu: L{LogicalUnit}
9621 @param lu: the logical unit on whose behalf we execute
9622 @type instance: L{objects.Instance}
9623 @param instance: the instance whose disks we should create
9624 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9625 @param disks: Disk details; tuple contains disk index, disk object and the
9629 node = instance.primary_node
9632 disks = [(idx, disk, 0)
9633 for (idx, disk) in enumerate(instance.disks)]
9635 for (_, device, _) in disks:
9636 lu.cfg.SetDiskID(device, node)
9638 logging.info("Pausing synchronization of disks of instance '%s'",
9640 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9641 (map(compat.snd, disks),
9644 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9646 for idx, success in enumerate(result.payload):
9648 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9649 " failed", idx, instance.name)
9652 for (idx, device, offset) in disks:
9653 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9654 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9656 int(min(constants.MAX_WIPE_CHUNK,
9657 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9661 start_time = time.time()
9666 info_text = (" (from %s to %s)" %
9667 (utils.FormatUnit(offset, "h"),
9668 utils.FormatUnit(size, "h")))
9670 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9672 logging.info("Wiping disk %d for instance %s on node %s using"
9673 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9675 while offset < size:
9676 wipe_size = min(wipe_chunk_size, size - offset)
9678 logging.debug("Wiping disk %d, offset %s, chunk %s",
9679 idx, offset, wipe_size)
9681 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9683 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9684 (idx, offset, wipe_size))
9688 if now - last_output >= 60:
9689 eta = _CalcEta(now - start_time, offset, size)
9690 lu.LogInfo(" - done: %.1f%% ETA: %s",
9691 offset / float(size) * 100, utils.FormatSeconds(eta))
9694 logging.info("Resuming synchronization of disks for instance '%s'",
9697 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9698 (map(compat.snd, disks),
9703 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9704 node, result.fail_msg)
9706 for idx, success in enumerate(result.payload):
9708 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9709 " failed", idx, instance.name)
9712 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9713 """Create all disks for an instance.
9715 This abstracts away some work from AddInstance.
9717 @type lu: L{LogicalUnit}
9718 @param lu: the logical unit on whose behalf we execute
9719 @type instance: L{objects.Instance}
9720 @param instance: the instance whose disks we should create
9722 @param to_skip: list of indices to skip
9723 @type target_node: string
9724 @param target_node: if passed, overrides the target node for creation
9726 @return: the success of the creation
9729 info = _GetInstanceInfoText(instance)
9730 if target_node is None:
9731 pnode = instance.primary_node
9732 all_nodes = instance.all_nodes
9737 if instance.disk_template in constants.DTS_FILEBASED:
9738 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9739 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9741 result.Raise("Failed to create directory '%s' on"
9742 " node %s" % (file_storage_dir, pnode))
9744 # Note: this needs to be kept in sync with adding of disks in
9745 # LUInstanceSetParams
9746 for idx, device in enumerate(instance.disks):
9747 if to_skip and idx in to_skip:
9749 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9751 for node in all_nodes:
9752 f_create = node == pnode
9753 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9756 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9757 """Remove all disks for an instance.
9759 This abstracts away some work from `AddInstance()` and
9760 `RemoveInstance()`. Note that in case some of the devices couldn't
9761 be removed, the removal will continue with the other ones (compare
9762 with `_CreateDisks()`).
9764 @type lu: L{LogicalUnit}
9765 @param lu: the logical unit on whose behalf we execute
9766 @type instance: L{objects.Instance}
9767 @param instance: the instance whose disks we should remove
9768 @type target_node: string
9769 @param target_node: used to override the node on which to remove the disks
9771 @return: the success of the removal
9774 logging.info("Removing block devices for instance %s", instance.name)
9777 ports_to_release = set()
9778 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9779 for (idx, device) in enumerate(anno_disks):
9781 edata = [(target_node, device)]
9783 edata = device.ComputeNodeTree(instance.primary_node)
9784 for node, disk in edata:
9785 lu.cfg.SetDiskID(disk, node)
9786 result = lu.rpc.call_blockdev_remove(node, disk)
9788 lu.LogWarning("Could not remove disk %s on node %s,"
9789 " continuing anyway: %s", idx, node, result.fail_msg)
9790 if not (result.offline and node != instance.primary_node):
9793 # if this is a DRBD disk, return its port to the pool
9794 if device.dev_type in constants.LDS_DRBD:
9795 ports_to_release.add(device.logical_id[2])
9797 if all_result or ignore_failures:
9798 for port in ports_to_release:
9799 lu.cfg.AddTcpUdpPort(port)
9801 if instance.disk_template in constants.DTS_FILEBASED:
9802 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9806 tgt = instance.primary_node
9807 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9809 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9810 file_storage_dir, instance.primary_node, result.fail_msg)
9816 def _ComputeDiskSizePerVG(disk_template, disks):
9817 """Compute disk size requirements in the volume group
9820 def _compute(disks, payload):
9821 """Universal algorithm.
9826 vgs[disk[constants.IDISK_VG]] = \
9827 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9831 # Required free disk space as a function of disk and swap space
9833 constants.DT_DISKLESS: {},
9834 constants.DT_PLAIN: _compute(disks, 0),
9835 # 128 MB are added for drbd metadata for each disk
9836 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9837 constants.DT_FILE: {},
9838 constants.DT_SHARED_FILE: {},
9841 if disk_template not in req_size_dict:
9842 raise errors.ProgrammerError("Disk template '%s' size requirement"
9843 " is unknown" % disk_template)
9845 return req_size_dict[disk_template]
9848 def _FilterVmNodes(lu, nodenames):
9849 """Filters out non-vm_capable nodes from a list.
9851 @type lu: L{LogicalUnit}
9852 @param lu: the logical unit for which we check
9853 @type nodenames: list
9854 @param nodenames: the list of nodes on which we should check
9856 @return: the list of vm-capable nodes
9859 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9860 return [name for name in nodenames if name not in vm_nodes]
9863 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9864 """Hypervisor parameter validation.
9866 This function abstract the hypervisor parameter validation to be
9867 used in both instance create and instance modify.
9869 @type lu: L{LogicalUnit}
9870 @param lu: the logical unit for which we check
9871 @type nodenames: list
9872 @param nodenames: the list of nodes on which we should check
9873 @type hvname: string
9874 @param hvname: the name of the hypervisor we should use
9875 @type hvparams: dict
9876 @param hvparams: the parameters which we need to check
9877 @raise errors.OpPrereqError: if the parameters are not valid
9880 nodenames = _FilterVmNodes(lu, nodenames)
9882 cluster = lu.cfg.GetClusterInfo()
9883 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9885 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9886 for node in nodenames:
9890 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9893 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9894 """OS parameters validation.
9896 @type lu: L{LogicalUnit}
9897 @param lu: the logical unit for which we check
9898 @type required: boolean
9899 @param required: whether the validation should fail if the OS is not
9901 @type nodenames: list
9902 @param nodenames: the list of nodes on which we should check
9903 @type osname: string
9904 @param osname: the name of the hypervisor we should use
9905 @type osparams: dict
9906 @param osparams: the parameters which we need to check
9907 @raise errors.OpPrereqError: if the parameters are not valid
9910 nodenames = _FilterVmNodes(lu, nodenames)
9911 result = lu.rpc.call_os_validate(nodenames, required, osname,
9912 [constants.OS_VALIDATE_PARAMETERS],
9914 for node, nres in result.items():
9915 # we don't check for offline cases since this should be run only
9916 # against the master node and/or an instance's nodes
9917 nres.Raise("OS Parameters validation failed on node %s" % node)
9918 if not nres.payload:
9919 lu.LogInfo("OS %s not found on node %s, validation skipped",
9923 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9924 """Wrapper around IAReqInstanceAlloc.
9926 @param op: The instance opcode
9927 @param disks: The computed disks
9928 @param nics: The computed nics
9929 @param beparams: The full filled beparams
9930 @param node_whitelist: List of nodes which should appear as online to the
9931 allocator (unless the node is already marked offline)
9933 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9936 spindle_use = beparams[constants.BE_SPINDLE_USE]
9937 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9938 disk_template=op.disk_template,
9941 vcpus=beparams[constants.BE_VCPUS],
9942 memory=beparams[constants.BE_MAXMEM],
9943 spindle_use=spindle_use,
9945 nics=[n.ToDict() for n in nics],
9946 hypervisor=op.hypervisor,
9947 node_whitelist=node_whitelist)
9950 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9951 """Computes the nics.
9953 @param op: The instance opcode
9954 @param cluster: Cluster configuration object
9955 @param default_ip: The default ip to assign
9956 @param cfg: An instance of the configuration object
9957 @param ec_id: Execution context ID
9959 @returns: The build up nics
9964 nic_mode_req = nic.get(constants.INIC_MODE, None)
9965 nic_mode = nic_mode_req
9966 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9967 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9969 net = nic.get(constants.INIC_NETWORK, None)
9970 link = nic.get(constants.NIC_LINK, None)
9971 ip = nic.get(constants.INIC_IP, None)
9973 if net is None or net.lower() == constants.VALUE_NONE:
9976 if nic_mode_req is not None or link is not None:
9977 raise errors.OpPrereqError("If network is given, no mode or link"
9978 " is allowed to be passed",
9981 # ip validity checks
9982 if ip is None or ip.lower() == constants.VALUE_NONE:
9984 elif ip.lower() == constants.VALUE_AUTO:
9985 if not op.name_check:
9986 raise errors.OpPrereqError("IP address set to auto but name checks"
9987 " have been skipped",
9991 # We defer pool operations until later, so that the iallocator has
9992 # filled in the instance's node(s) dimara
9993 if ip.lower() == constants.NIC_IP_POOL:
9995 raise errors.OpPrereqError("if ip=pool, parameter network"
9996 " must be passed too",
9999 elif not netutils.IPAddress.IsValid(ip):
10000 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10001 errors.ECODE_INVAL)
10005 # TODO: check the ip address for uniqueness
10006 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10007 raise errors.OpPrereqError("Routed nic mode requires an ip address",
10008 errors.ECODE_INVAL)
10010 # MAC address verification
10011 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10012 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10013 mac = utils.NormalizeAndValidateMac(mac)
10016 # TODO: We need to factor this out
10017 cfg.ReserveMAC(mac, ec_id)
10018 except errors.ReservationError:
10019 raise errors.OpPrereqError("MAC address %s already in use"
10020 " in cluster" % mac,
10021 errors.ECODE_NOTUNIQUE)
10023 # Build nic parameters
10026 nicparams[constants.NIC_MODE] = nic_mode
10028 nicparams[constants.NIC_LINK] = link
10030 check_params = cluster.SimpleFillNIC(nicparams)
10031 objects.NIC.CheckParameterSyntax(check_params)
10032 net_uuid = cfg.LookupNetwork(net)
10033 nics.append(objects.NIC(mac=mac, ip=nic_ip,
10034 network=net_uuid, nicparams=nicparams))
10039 def _ComputeDisks(op, default_vg):
10040 """Computes the instance disks.
10042 @param op: The instance opcode
10043 @param default_vg: The default_vg to assume
10045 @return: The computed disks
10049 for disk in op.disks:
10050 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10051 if mode not in constants.DISK_ACCESS_SET:
10052 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10053 mode, errors.ECODE_INVAL)
10054 size = disk.get(constants.IDISK_SIZE, None)
10056 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10059 except (TypeError, ValueError):
10060 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10061 errors.ECODE_INVAL)
10063 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10064 if ext_provider and op.disk_template != constants.DT_EXT:
10065 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10066 " disk template, not %s" %
10067 (constants.IDISK_PROVIDER, constants.DT_EXT,
10068 op.disk_template), errors.ECODE_INVAL)
10070 data_vg = disk.get(constants.IDISK_VG, default_vg)
10072 constants.IDISK_SIZE: size,
10073 constants.IDISK_MODE: mode,
10074 constants.IDISK_VG: data_vg,
10077 if constants.IDISK_METAVG in disk:
10078 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10079 if constants.IDISK_ADOPT in disk:
10080 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10082 # For extstorage, demand the `provider' option and add any
10083 # additional parameters (ext-params) to the dict
10084 if op.disk_template == constants.DT_EXT:
10086 new_disk[constants.IDISK_PROVIDER] = ext_provider
10088 if key not in constants.IDISK_PARAMS:
10089 new_disk[key] = disk[key]
10091 raise errors.OpPrereqError("Missing provider for template '%s'" %
10092 constants.DT_EXT, errors.ECODE_INVAL)
10094 disks.append(new_disk)
10099 def _ComputeFullBeParams(op, cluster):
10100 """Computes the full beparams.
10102 @param op: The instance opcode
10103 @param cluster: The cluster config object
10105 @return: The fully filled beparams
10108 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10109 for param, value in op.beparams.iteritems():
10110 if value == constants.VALUE_AUTO:
10111 op.beparams[param] = default_beparams[param]
10112 objects.UpgradeBeParams(op.beparams)
10113 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10114 return cluster.SimpleFillBE(op.beparams)
10117 def _CheckOpportunisticLocking(op):
10118 """Generate error if opportunistic locking is not possible.
10121 if op.opportunistic_locking and not op.iallocator:
10122 raise errors.OpPrereqError("Opportunistic locking is only available in"
10123 " combination with an instance allocator",
10124 errors.ECODE_INVAL)
10127 class LUInstanceCreate(LogicalUnit):
10128 """Create an instance.
10131 HPATH = "instance-add"
10132 HTYPE = constants.HTYPE_INSTANCE
10135 def CheckArguments(self):
10136 """Check arguments.
10139 # do not require name_check to ease forward/backward compatibility
10141 if self.op.no_install and self.op.start:
10142 self.LogInfo("No-installation mode selected, disabling startup")
10143 self.op.start = False
10144 # validate/normalize the instance name
10145 self.op.instance_name = \
10146 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10148 if self.op.ip_check and not self.op.name_check:
10149 # TODO: make the ip check more flexible and not depend on the name check
10150 raise errors.OpPrereqError("Cannot do IP address check without a name"
10151 " check", errors.ECODE_INVAL)
10153 # check nics' parameter names
10154 for nic in self.op.nics:
10155 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10157 # check disks. parameter names and consistent adopt/no-adopt strategy
10158 has_adopt = has_no_adopt = False
10159 for disk in self.op.disks:
10160 if self.op.disk_template != constants.DT_EXT:
10161 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10162 if constants.IDISK_ADOPT in disk:
10165 has_no_adopt = True
10166 if has_adopt and has_no_adopt:
10167 raise errors.OpPrereqError("Either all disks are adopted or none is",
10168 errors.ECODE_INVAL)
10170 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10171 raise errors.OpPrereqError("Disk adoption is not supported for the"
10172 " '%s' disk template" %
10173 self.op.disk_template,
10174 errors.ECODE_INVAL)
10175 if self.op.iallocator is not None:
10176 raise errors.OpPrereqError("Disk adoption not allowed with an"
10177 " iallocator script", errors.ECODE_INVAL)
10178 if self.op.mode == constants.INSTANCE_IMPORT:
10179 raise errors.OpPrereqError("Disk adoption not allowed for"
10180 " instance import", errors.ECODE_INVAL)
10182 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10183 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10184 " but no 'adopt' parameter given" %
10185 self.op.disk_template,
10186 errors.ECODE_INVAL)
10188 self.adopt_disks = has_adopt
10190 # instance name verification
10191 if self.op.name_check:
10192 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10193 self.op.instance_name = self.hostname1.name
10194 # used in CheckPrereq for ip ping check
10195 self.check_ip = self.hostname1.ip
10197 self.check_ip = None
10199 # file storage checks
10200 if (self.op.file_driver and
10201 not self.op.file_driver in constants.FILE_DRIVER):
10202 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10203 self.op.file_driver, errors.ECODE_INVAL)
10205 if self.op.disk_template == constants.DT_FILE:
10206 opcodes.RequireFileStorage()
10207 elif self.op.disk_template == constants.DT_SHARED_FILE:
10208 opcodes.RequireSharedFileStorage()
10210 ### Node/iallocator related checks
10211 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10213 if self.op.pnode is not None:
10214 if self.op.disk_template in constants.DTS_INT_MIRROR:
10215 if self.op.snode is None:
10216 raise errors.OpPrereqError("The networked disk templates need"
10217 " a mirror node", errors.ECODE_INVAL)
10218 elif self.op.snode:
10219 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10221 self.op.snode = None
10223 _CheckOpportunisticLocking(self.op)
10225 self._cds = _GetClusterDomainSecret()
10227 if self.op.mode == constants.INSTANCE_IMPORT:
10228 # On import force_variant must be True, because if we forced it at
10229 # initial install, our only chance when importing it back is that it
10231 self.op.force_variant = True
10233 if self.op.no_install:
10234 self.LogInfo("No-installation mode has no effect during import")
10236 elif self.op.mode == constants.INSTANCE_CREATE:
10237 if self.op.os_type is None:
10238 raise errors.OpPrereqError("No guest OS specified",
10239 errors.ECODE_INVAL)
10240 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10241 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10242 " installation" % self.op.os_type,
10243 errors.ECODE_STATE)
10244 if self.op.disk_template is None:
10245 raise errors.OpPrereqError("No disk template specified",
10246 errors.ECODE_INVAL)
10248 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10249 # Check handshake to ensure both clusters have the same domain secret
10250 src_handshake = self.op.source_handshake
10251 if not src_handshake:
10252 raise errors.OpPrereqError("Missing source handshake",
10253 errors.ECODE_INVAL)
10255 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10258 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10259 errors.ECODE_INVAL)
10261 # Load and check source CA
10262 self.source_x509_ca_pem = self.op.source_x509_ca
10263 if not self.source_x509_ca_pem:
10264 raise errors.OpPrereqError("Missing source X509 CA",
10265 errors.ECODE_INVAL)
10268 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10270 except OpenSSL.crypto.Error, err:
10271 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10272 (err, ), errors.ECODE_INVAL)
10274 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10275 if errcode is not None:
10276 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10277 errors.ECODE_INVAL)
10279 self.source_x509_ca = cert
10281 src_instance_name = self.op.source_instance_name
10282 if not src_instance_name:
10283 raise errors.OpPrereqError("Missing source instance name",
10284 errors.ECODE_INVAL)
10286 self.source_instance_name = \
10287 netutils.GetHostname(name=src_instance_name).name
10290 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10291 self.op.mode, errors.ECODE_INVAL)
10293 def ExpandNames(self):
10294 """ExpandNames for CreateInstance.
10296 Figure out the right locks for instance creation.
10299 self.needed_locks = {}
10301 instance_name = self.op.instance_name
10302 # this is just a preventive check, but someone might still add this
10303 # instance in the meantime, and creation will fail at lock-add time
10304 if instance_name in self.cfg.GetInstanceList():
10305 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10306 instance_name, errors.ECODE_EXISTS)
10308 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10310 if self.op.iallocator:
10311 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10312 # specifying a group on instance creation and then selecting nodes from
10314 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10315 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10317 if self.op.opportunistic_locking:
10318 self.opportunistic_locks[locking.LEVEL_NODE] = True
10319 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10321 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10322 nodelist = [self.op.pnode]
10323 if self.op.snode is not None:
10324 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10325 nodelist.append(self.op.snode)
10326 self.needed_locks[locking.LEVEL_NODE] = nodelist
10328 # in case of import lock the source node too
10329 if self.op.mode == constants.INSTANCE_IMPORT:
10330 src_node = self.op.src_node
10331 src_path = self.op.src_path
10333 if src_path is None:
10334 self.op.src_path = src_path = self.op.instance_name
10336 if src_node is None:
10337 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10338 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10339 self.op.src_node = None
10340 if os.path.isabs(src_path):
10341 raise errors.OpPrereqError("Importing an instance from a path"
10342 " requires a source node option",
10343 errors.ECODE_INVAL)
10345 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10346 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10347 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10348 if not os.path.isabs(src_path):
10349 self.op.src_path = src_path = \
10350 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10352 self.needed_locks[locking.LEVEL_NODE_RES] = \
10353 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10355 def _RunAllocator(self):
10356 """Run the allocator based on input opcode.
10359 if self.op.opportunistic_locking:
10360 # Only consider nodes for which a lock is held
10361 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10363 node_whitelist = None
10365 #TODO Export network to iallocator so that it chooses a pnode
10366 # in a nodegroup that has the desired network connected to
10367 req = _CreateInstanceAllocRequest(self.op, self.disks,
10368 self.nics, self.be_full,
10370 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10372 ial.Run(self.op.iallocator)
10374 if not ial.success:
10375 # When opportunistic locks are used only a temporary failure is generated
10376 if self.op.opportunistic_locking:
10377 ecode = errors.ECODE_TEMP_NORES
10379 ecode = errors.ECODE_NORES
10381 raise errors.OpPrereqError("Can't compute nodes using"
10382 " iallocator '%s': %s" %
10383 (self.op.iallocator, ial.info),
10386 self.op.pnode = ial.result[0]
10387 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10388 self.op.instance_name, self.op.iallocator,
10389 utils.CommaJoin(ial.result))
10391 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10393 if req.RequiredNodes() == 2:
10394 self.op.snode = ial.result[1]
10396 def BuildHooksEnv(self):
10397 """Build hooks env.
10399 This runs on master, primary and secondary nodes of the instance.
10403 "ADD_MODE": self.op.mode,
10405 if self.op.mode == constants.INSTANCE_IMPORT:
10406 env["SRC_NODE"] = self.op.src_node
10407 env["SRC_PATH"] = self.op.src_path
10408 env["SRC_IMAGES"] = self.src_images
10410 env.update(_BuildInstanceHookEnv(
10411 name=self.op.instance_name,
10412 primary_node=self.op.pnode,
10413 secondary_nodes=self.secondaries,
10414 status=self.op.start,
10415 os_type=self.op.os_type,
10416 minmem=self.be_full[constants.BE_MINMEM],
10417 maxmem=self.be_full[constants.BE_MAXMEM],
10418 vcpus=self.be_full[constants.BE_VCPUS],
10419 nics=_NICListToTuple(self, self.nics),
10420 disk_template=self.op.disk_template,
10421 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10422 for d in self.disks],
10425 hypervisor_name=self.op.hypervisor,
10431 def BuildHooksNodes(self):
10432 """Build hooks nodes.
10435 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10438 def _ReadExportInfo(self):
10439 """Reads the export information from disk.
10441 It will override the opcode source node and path with the actual
10442 information, if these two were not specified before.
10444 @return: the export information
10447 assert self.op.mode == constants.INSTANCE_IMPORT
10449 src_node = self.op.src_node
10450 src_path = self.op.src_path
10452 if src_node is None:
10453 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10454 exp_list = self.rpc.call_export_list(locked_nodes)
10456 for node in exp_list:
10457 if exp_list[node].fail_msg:
10459 if src_path in exp_list[node].payload:
10461 self.op.src_node = src_node = node
10462 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10466 raise errors.OpPrereqError("No export found for relative path %s" %
10467 src_path, errors.ECODE_INVAL)
10469 _CheckNodeOnline(self, src_node)
10470 result = self.rpc.call_export_info(src_node, src_path)
10471 result.Raise("No export or invalid export found in dir %s" % src_path)
10473 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10474 if not export_info.has_section(constants.INISECT_EXP):
10475 raise errors.ProgrammerError("Corrupted export config",
10476 errors.ECODE_ENVIRON)
10478 ei_version = export_info.get(constants.INISECT_EXP, "version")
10479 if (int(ei_version) != constants.EXPORT_VERSION):
10480 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10481 (ei_version, constants.EXPORT_VERSION),
10482 errors.ECODE_ENVIRON)
10485 def _ReadExportParams(self, einfo):
10486 """Use export parameters as defaults.
10488 In case the opcode doesn't specify (as in override) some instance
10489 parameters, then try to use them from the export information, if
10490 that declares them.
10493 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10495 if self.op.disk_template is None:
10496 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10497 self.op.disk_template = einfo.get(constants.INISECT_INS,
10499 if self.op.disk_template not in constants.DISK_TEMPLATES:
10500 raise errors.OpPrereqError("Disk template specified in configuration"
10501 " file is not one of the allowed values:"
10503 " ".join(constants.DISK_TEMPLATES),
10504 errors.ECODE_INVAL)
10506 raise errors.OpPrereqError("No disk template specified and the export"
10507 " is missing the disk_template information",
10508 errors.ECODE_INVAL)
10510 if not self.op.disks:
10512 # TODO: import the disk iv_name too
10513 for idx in range(constants.MAX_DISKS):
10514 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10515 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10516 disks.append({constants.IDISK_SIZE: disk_sz})
10517 self.op.disks = disks
10518 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10519 raise errors.OpPrereqError("No disk info specified and the export"
10520 " is missing the disk information",
10521 errors.ECODE_INVAL)
10523 if not self.op.nics:
10525 for idx in range(constants.MAX_NICS):
10526 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10528 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10529 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10534 self.op.nics = nics
10536 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10537 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10539 if (self.op.hypervisor is None and
10540 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10541 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10543 if einfo.has_section(constants.INISECT_HYP):
10544 # use the export parameters but do not override the ones
10545 # specified by the user
10546 for name, value in einfo.items(constants.INISECT_HYP):
10547 if name not in self.op.hvparams:
10548 self.op.hvparams[name] = value
10550 if einfo.has_section(constants.INISECT_BEP):
10551 # use the parameters, without overriding
10552 for name, value in einfo.items(constants.INISECT_BEP):
10553 if name not in self.op.beparams:
10554 self.op.beparams[name] = value
10555 # Compatibility for the old "memory" be param
10556 if name == constants.BE_MEMORY:
10557 if constants.BE_MAXMEM not in self.op.beparams:
10558 self.op.beparams[constants.BE_MAXMEM] = value
10559 if constants.BE_MINMEM not in self.op.beparams:
10560 self.op.beparams[constants.BE_MINMEM] = value
10562 # try to read the parameters old style, from the main section
10563 for name in constants.BES_PARAMETERS:
10564 if (name not in self.op.beparams and
10565 einfo.has_option(constants.INISECT_INS, name)):
10566 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10568 if einfo.has_section(constants.INISECT_OSP):
10569 # use the parameters, without overriding
10570 for name, value in einfo.items(constants.INISECT_OSP):
10571 if name not in self.op.osparams:
10572 self.op.osparams[name] = value
10574 def _RevertToDefaults(self, cluster):
10575 """Revert the instance parameters to the default values.
10579 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10580 for name in self.op.hvparams.keys():
10581 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10582 del self.op.hvparams[name]
10584 be_defs = cluster.SimpleFillBE({})
10585 for name in self.op.beparams.keys():
10586 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10587 del self.op.beparams[name]
10589 nic_defs = cluster.SimpleFillNIC({})
10590 for nic in self.op.nics:
10591 for name in constants.NICS_PARAMETERS:
10592 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10595 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10596 for name in self.op.osparams.keys():
10597 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10598 del self.op.osparams[name]
10600 def _CalculateFileStorageDir(self):
10601 """Calculate final instance file storage dir.
10604 # file storage dir calculation/check
10605 self.instance_file_storage_dir = None
10606 if self.op.disk_template in constants.DTS_FILEBASED:
10607 # build the full file storage dir path
10610 if self.op.disk_template == constants.DT_SHARED_FILE:
10611 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10613 get_fsd_fn = self.cfg.GetFileStorageDir
10615 cfg_storagedir = get_fsd_fn()
10616 if not cfg_storagedir:
10617 raise errors.OpPrereqError("Cluster file storage dir not defined",
10618 errors.ECODE_STATE)
10619 joinargs.append(cfg_storagedir)
10621 if self.op.file_storage_dir is not None:
10622 joinargs.append(self.op.file_storage_dir)
10624 joinargs.append(self.op.instance_name)
10626 # pylint: disable=W0142
10627 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10629 def CheckPrereq(self): # pylint: disable=R0914
10630 """Check prerequisites.
10633 self._CalculateFileStorageDir()
10635 if self.op.mode == constants.INSTANCE_IMPORT:
10636 export_info = self._ReadExportInfo()
10637 self._ReadExportParams(export_info)
10638 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10640 self._old_instance_name = None
10642 if (not self.cfg.GetVGName() and
10643 self.op.disk_template not in constants.DTS_NOT_LVM):
10644 raise errors.OpPrereqError("Cluster does not support lvm-based"
10645 " instances", errors.ECODE_STATE)
10647 if (self.op.hypervisor is None or
10648 self.op.hypervisor == constants.VALUE_AUTO):
10649 self.op.hypervisor = self.cfg.GetHypervisorType()
10651 cluster = self.cfg.GetClusterInfo()
10652 enabled_hvs = cluster.enabled_hypervisors
10653 if self.op.hypervisor not in enabled_hvs:
10654 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10656 (self.op.hypervisor, ",".join(enabled_hvs)),
10657 errors.ECODE_STATE)
10659 # Check tag validity
10660 for tag in self.op.tags:
10661 objects.TaggableObject.ValidateTag(tag)
10663 # check hypervisor parameter syntax (locally)
10664 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10665 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10667 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10668 hv_type.CheckParameterSyntax(filled_hvp)
10669 self.hv_full = filled_hvp
10670 # check that we don't specify global parameters on an instance
10671 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10672 "instance", "cluster")
10674 # fill and remember the beparams dict
10675 self.be_full = _ComputeFullBeParams(self.op, cluster)
10677 # build os parameters
10678 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10680 # now that hvp/bep are in final format, let's reset to defaults,
10682 if self.op.identify_defaults:
10683 self._RevertToDefaults(cluster)
10686 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10687 self.proc.GetECId())
10689 # disk checks/pre-build
10690 default_vg = self.cfg.GetVGName()
10691 self.disks = _ComputeDisks(self.op, default_vg)
10693 if self.op.mode == constants.INSTANCE_IMPORT:
10695 for idx in range(len(self.disks)):
10696 option = "disk%d_dump" % idx
10697 if export_info.has_option(constants.INISECT_INS, option):
10698 # FIXME: are the old os-es, disk sizes, etc. useful?
10699 export_name = export_info.get(constants.INISECT_INS, option)
10700 image = utils.PathJoin(self.op.src_path, export_name)
10701 disk_images.append(image)
10703 disk_images.append(False)
10705 self.src_images = disk_images
10707 if self.op.instance_name == self._old_instance_name:
10708 for idx, nic in enumerate(self.nics):
10709 if nic.mac == constants.VALUE_AUTO:
10710 nic_mac_ini = "nic%d_mac" % idx
10711 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10713 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10715 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10716 if self.op.ip_check:
10717 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10718 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10719 (self.check_ip, self.op.instance_name),
10720 errors.ECODE_NOTUNIQUE)
10722 #### mac address generation
10723 # By generating here the mac address both the allocator and the hooks get
10724 # the real final mac address rather than the 'auto' or 'generate' value.
10725 # There is a race condition between the generation and the instance object
10726 # creation, which means that we know the mac is valid now, but we're not
10727 # sure it will be when we actually add the instance. If things go bad
10728 # adding the instance will abort because of a duplicate mac, and the
10729 # creation job will fail.
10730 for nic in self.nics:
10731 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10732 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10736 if self.op.iallocator is not None:
10737 self._RunAllocator()
10739 # Release all unneeded node locks
10740 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10741 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10742 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10743 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10745 assert (self.owned_locks(locking.LEVEL_NODE) ==
10746 self.owned_locks(locking.LEVEL_NODE_RES)), \
10747 "Node locks differ from node resource locks"
10749 #### node related checks
10751 # check primary node
10752 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10753 assert self.pnode is not None, \
10754 "Cannot retrieve locked node %s" % self.op.pnode
10756 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10757 pnode.name, errors.ECODE_STATE)
10759 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10760 pnode.name, errors.ECODE_STATE)
10761 if not pnode.vm_capable:
10762 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10763 " '%s'" % pnode.name, errors.ECODE_STATE)
10765 self.secondaries = []
10767 # Fill in any IPs from IP pools. This must happen here, because we need to
10768 # know the nic's primary node, as specified by the iallocator
10769 for idx, nic in enumerate(self.nics):
10770 net_uuid = nic.network
10771 if net_uuid is not None:
10772 nobj = self.cfg.GetNetwork(net_uuid)
10773 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10774 if netparams is None:
10775 raise errors.OpPrereqError("No netparams found for network"
10776 " %s. Propably not connected to"
10777 " node's %s nodegroup" %
10778 (nobj.name, self.pnode.name),
10779 errors.ECODE_INVAL)
10780 self.LogInfo("NIC/%d inherits netparams %s" %
10781 (idx, netparams.values()))
10782 nic.nicparams = dict(netparams)
10783 if nic.ip is not None:
10784 if nic.ip.lower() == constants.NIC_IP_POOL:
10786 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10787 except errors.ReservationError:
10788 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10789 " from the address pool" % idx,
10790 errors.ECODE_STATE)
10791 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10794 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10795 except errors.ReservationError:
10796 raise errors.OpPrereqError("IP address %s already in use"
10797 " or does not belong to network %s" %
10798 (nic.ip, nobj.name),
10799 errors.ECODE_NOTUNIQUE)
10801 # net is None, ip None or given
10802 elif self.op.conflicts_check:
10803 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10805 # mirror node verification
10806 if self.op.disk_template in constants.DTS_INT_MIRROR:
10807 if self.op.snode == pnode.name:
10808 raise errors.OpPrereqError("The secondary node cannot be the"
10809 " primary node", errors.ECODE_INVAL)
10810 _CheckNodeOnline(self, self.op.snode)
10811 _CheckNodeNotDrained(self, self.op.snode)
10812 _CheckNodeVmCapable(self, self.op.snode)
10813 self.secondaries.append(self.op.snode)
10815 snode = self.cfg.GetNodeInfo(self.op.snode)
10816 if pnode.group != snode.group:
10817 self.LogWarning("The primary and secondary nodes are in two"
10818 " different node groups; the disk parameters"
10819 " from the first disk's node group will be"
10822 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10824 if self.op.disk_template in constants.DTS_INT_MIRROR:
10825 nodes.append(snode)
10826 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10827 if compat.any(map(has_es, nodes)):
10828 raise errors.OpPrereqError("Disk template %s not supported with"
10829 " exclusive storage" % self.op.disk_template,
10830 errors.ECODE_STATE)
10832 nodenames = [pnode.name] + self.secondaries
10834 if not self.adopt_disks:
10835 if self.op.disk_template == constants.DT_RBD:
10836 # _CheckRADOSFreeSpace() is just a placeholder.
10837 # Any function that checks prerequisites can be placed here.
10838 # Check if there is enough space on the RADOS cluster.
10839 _CheckRADOSFreeSpace()
10840 elif self.op.disk_template == constants.DT_EXT:
10841 # FIXME: Function that checks prereqs if needed
10844 # Check lv size requirements, if not adopting
10845 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10846 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10848 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10849 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10850 disk[constants.IDISK_ADOPT])
10851 for disk in self.disks])
10852 if len(all_lvs) != len(self.disks):
10853 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10854 errors.ECODE_INVAL)
10855 for lv_name in all_lvs:
10857 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10858 # to ReserveLV uses the same syntax
10859 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10860 except errors.ReservationError:
10861 raise errors.OpPrereqError("LV named %s used by another instance" %
10862 lv_name, errors.ECODE_NOTUNIQUE)
10864 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10865 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10867 node_lvs = self.rpc.call_lv_list([pnode.name],
10868 vg_names.payload.keys())[pnode.name]
10869 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10870 node_lvs = node_lvs.payload
10872 delta = all_lvs.difference(node_lvs.keys())
10874 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10875 utils.CommaJoin(delta),
10876 errors.ECODE_INVAL)
10877 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10879 raise errors.OpPrereqError("Online logical volumes found, cannot"
10880 " adopt: %s" % utils.CommaJoin(online_lvs),
10881 errors.ECODE_STATE)
10882 # update the size of disk based on what is found
10883 for dsk in self.disks:
10884 dsk[constants.IDISK_SIZE] = \
10885 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10886 dsk[constants.IDISK_ADOPT])][0]))
10888 elif self.op.disk_template == constants.DT_BLOCK:
10889 # Normalize and de-duplicate device paths
10890 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10891 for disk in self.disks])
10892 if len(all_disks) != len(self.disks):
10893 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10894 errors.ECODE_INVAL)
10895 baddisks = [d for d in all_disks
10896 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10898 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10899 " cannot be adopted" %
10900 (utils.CommaJoin(baddisks),
10901 constants.ADOPTABLE_BLOCKDEV_ROOT),
10902 errors.ECODE_INVAL)
10904 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10905 list(all_disks))[pnode.name]
10906 node_disks.Raise("Cannot get block device information from node %s" %
10908 node_disks = node_disks.payload
10909 delta = all_disks.difference(node_disks.keys())
10911 raise errors.OpPrereqError("Missing block device(s): %s" %
10912 utils.CommaJoin(delta),
10913 errors.ECODE_INVAL)
10914 for dsk in self.disks:
10915 dsk[constants.IDISK_SIZE] = \
10916 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10918 # Verify instance specs
10919 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10921 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10922 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10923 constants.ISPEC_DISK_COUNT: len(self.disks),
10924 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10925 for disk in self.disks],
10926 constants.ISPEC_NIC_COUNT: len(self.nics),
10927 constants.ISPEC_SPINDLE_USE: spindle_use,
10930 group_info = self.cfg.GetNodeGroup(pnode.group)
10931 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10932 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10933 self.op.disk_template)
10934 if not self.op.ignore_ipolicy and res:
10935 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10936 (pnode.group, group_info.name, utils.CommaJoin(res)))
10937 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10939 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10941 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10942 # check OS parameters (remotely)
10943 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10945 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10947 #TODO: _CheckExtParams (remotely)
10948 # Check parameters for extstorage
10950 # memory check on primary node
10951 #TODO(dynmem): use MINMEM for checking
10953 _CheckNodeFreeMemory(self, self.pnode.name,
10954 "creating instance %s" % self.op.instance_name,
10955 self.be_full[constants.BE_MAXMEM],
10956 self.op.hypervisor)
10958 self.dry_run_result = list(nodenames)
10960 def Exec(self, feedback_fn):
10961 """Create and add the instance to the cluster.
10964 instance = self.op.instance_name
10965 pnode_name = self.pnode.name
10967 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10968 self.owned_locks(locking.LEVEL_NODE)), \
10969 "Node locks differ from node resource locks"
10970 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10972 ht_kind = self.op.hypervisor
10973 if ht_kind in constants.HTS_REQ_PORT:
10974 network_port = self.cfg.AllocatePort()
10976 network_port = None
10978 # This is ugly but we got a chicken-egg problem here
10979 # We can only take the group disk parameters, as the instance
10980 # has no disks yet (we are generating them right here).
10981 node = self.cfg.GetNodeInfo(pnode_name)
10982 nodegroup = self.cfg.GetNodeGroup(node.group)
10983 disks = _GenerateDiskTemplate(self,
10984 self.op.disk_template,
10985 instance, pnode_name,
10988 self.instance_file_storage_dir,
10989 self.op.file_driver,
10992 self.cfg.GetGroupDiskParams(nodegroup))
10994 iobj = objects.Instance(name=instance, os=self.op.os_type,
10995 primary_node=pnode_name,
10996 nics=self.nics, disks=disks,
10997 disk_template=self.op.disk_template,
10998 admin_state=constants.ADMINST_DOWN,
10999 network_port=network_port,
11000 beparams=self.op.beparams,
11001 hvparams=self.op.hvparams,
11002 hypervisor=self.op.hypervisor,
11003 osparams=self.op.osparams,
11007 for tag in self.op.tags:
11010 if self.adopt_disks:
11011 if self.op.disk_template == constants.DT_PLAIN:
11012 # rename LVs to the newly-generated names; we need to construct
11013 # 'fake' LV disks with the old data, plus the new unique_id
11014 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
11016 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
11017 rename_to.append(t_dsk.logical_id)
11018 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
11019 self.cfg.SetDiskID(t_dsk, pnode_name)
11020 result = self.rpc.call_blockdev_rename(pnode_name,
11021 zip(tmp_disks, rename_to))
11022 result.Raise("Failed to rename adoped LVs")
11024 feedback_fn("* creating instance disks...")
11026 _CreateDisks(self, iobj)
11027 except errors.OpExecError:
11028 self.LogWarning("Device creation failed, reverting...")
11030 _RemoveDisks(self, iobj)
11032 self.cfg.ReleaseDRBDMinors(instance)
11035 feedback_fn("adding instance %s to cluster config" % instance)
11037 self.cfg.AddInstance(iobj, self.proc.GetECId())
11039 # Declare that we don't want to remove the instance lock anymore, as we've
11040 # added the instance to the config
11041 del self.remove_locks[locking.LEVEL_INSTANCE]
11043 if self.op.mode == constants.INSTANCE_IMPORT:
11044 # Release unused nodes
11045 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11047 # Release all nodes
11048 _ReleaseLocks(self, locking.LEVEL_NODE)
11051 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11052 feedback_fn("* wiping instance disks...")
11054 _WipeDisks(self, iobj)
11055 except errors.OpExecError, err:
11056 logging.exception("Wiping disks failed")
11057 self.LogWarning("Wiping instance disks failed (%s)", err)
11061 # Something is already wrong with the disks, don't do anything else
11063 elif self.op.wait_for_sync:
11064 disk_abort = not _WaitForSync(self, iobj)
11065 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11066 # make sure the disks are not degraded (still sync-ing is ok)
11067 feedback_fn("* checking mirrors status")
11068 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11073 _RemoveDisks(self, iobj)
11074 self.cfg.RemoveInstance(iobj.name)
11075 # Make sure the instance lock gets removed
11076 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11077 raise errors.OpExecError("There are some degraded disks for"
11080 # Release all node resource locks
11081 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11083 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11084 # we need to set the disks ID to the primary node, since the
11085 # preceding code might or might have not done it, depending on
11086 # disk template and other options
11087 for disk in iobj.disks:
11088 self.cfg.SetDiskID(disk, pnode_name)
11089 if self.op.mode == constants.INSTANCE_CREATE:
11090 if not self.op.no_install:
11091 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11092 not self.op.wait_for_sync)
11094 feedback_fn("* pausing disk sync to install instance OS")
11095 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11098 for idx, success in enumerate(result.payload):
11100 logging.warn("pause-sync of instance %s for disk %d failed",
11103 feedback_fn("* running the instance OS create scripts...")
11104 # FIXME: pass debug option from opcode to backend
11106 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11107 self.op.debug_level)
11109 feedback_fn("* resuming disk sync")
11110 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11113 for idx, success in enumerate(result.payload):
11115 logging.warn("resume-sync of instance %s for disk %d failed",
11118 os_add_result.Raise("Could not add os for instance %s"
11119 " on node %s" % (instance, pnode_name))
11122 if self.op.mode == constants.INSTANCE_IMPORT:
11123 feedback_fn("* running the instance OS import scripts...")
11127 for idx, image in enumerate(self.src_images):
11131 # FIXME: pass debug option from opcode to backend
11132 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11133 constants.IEIO_FILE, (image, ),
11134 constants.IEIO_SCRIPT,
11135 (iobj.disks[idx], idx),
11137 transfers.append(dt)
11140 masterd.instance.TransferInstanceData(self, feedback_fn,
11141 self.op.src_node, pnode_name,
11142 self.pnode.secondary_ip,
11144 if not compat.all(import_result):
11145 self.LogWarning("Some disks for instance %s on node %s were not"
11146 " imported successfully" % (instance, pnode_name))
11148 rename_from = self._old_instance_name
11150 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11151 feedback_fn("* preparing remote import...")
11152 # The source cluster will stop the instance before attempting to make
11153 # a connection. In some cases stopping an instance can take a long
11154 # time, hence the shutdown timeout is added to the connection
11156 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11157 self.op.source_shutdown_timeout)
11158 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11160 assert iobj.primary_node == self.pnode.name
11162 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11163 self.source_x509_ca,
11164 self._cds, timeouts)
11165 if not compat.all(disk_results):
11166 # TODO: Should the instance still be started, even if some disks
11167 # failed to import (valid for local imports, too)?
11168 self.LogWarning("Some disks for instance %s on node %s were not"
11169 " imported successfully" % (instance, pnode_name))
11171 rename_from = self.source_instance_name
11174 # also checked in the prereq part
11175 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11178 # Run rename script on newly imported instance
11179 assert iobj.name == instance
11180 feedback_fn("Running rename script for %s" % instance)
11181 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11183 self.op.debug_level)
11184 if result.fail_msg:
11185 self.LogWarning("Failed to run rename script for %s on node"
11186 " %s: %s" % (instance, pnode_name, result.fail_msg))
11188 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11191 iobj.admin_state = constants.ADMINST_UP
11192 self.cfg.Update(iobj, feedback_fn)
11193 logging.info("Starting instance %s on node %s", instance, pnode_name)
11194 feedback_fn("* starting instance...")
11195 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11197 result.Raise("Could not start instance")
11199 return list(iobj.all_nodes)
11202 class LUInstanceMultiAlloc(NoHooksLU):
11203 """Allocates multiple instances at the same time.
11208 def CheckArguments(self):
11209 """Check arguments.
11213 for inst in self.op.instances:
11214 if inst.iallocator is not None:
11215 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11216 " instance objects", errors.ECODE_INVAL)
11217 nodes.append(bool(inst.pnode))
11218 if inst.disk_template in constants.DTS_INT_MIRROR:
11219 nodes.append(bool(inst.snode))
11221 has_nodes = compat.any(nodes)
11222 if compat.all(nodes) ^ has_nodes:
11223 raise errors.OpPrereqError("There are instance objects providing"
11224 " pnode/snode while others do not",
11225 errors.ECODE_INVAL)
11227 if self.op.iallocator is None:
11228 default_iallocator = self.cfg.GetDefaultIAllocator()
11229 if default_iallocator and has_nodes:
11230 self.op.iallocator = default_iallocator
11232 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11233 " given and no cluster-wide default"
11234 " iallocator found; please specify either"
11235 " an iallocator or nodes on the instances"
11236 " or set a cluster-wide default iallocator",
11237 errors.ECODE_INVAL)
11239 _CheckOpportunisticLocking(self.op)
11241 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11243 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11244 utils.CommaJoin(dups), errors.ECODE_INVAL)
11246 def ExpandNames(self):
11247 """Calculate the locks.
11250 self.share_locks = _ShareAll()
11251 self.needed_locks = {
11252 # iallocator will select nodes and even if no iallocator is used,
11253 # collisions with LUInstanceCreate should be avoided
11254 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11257 if self.op.iallocator:
11258 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11259 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11261 if self.op.opportunistic_locking:
11262 self.opportunistic_locks[locking.LEVEL_NODE] = True
11263 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11266 for inst in self.op.instances:
11267 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11268 nodeslist.append(inst.pnode)
11269 if inst.snode is not None:
11270 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11271 nodeslist.append(inst.snode)
11273 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11274 # Lock resources of instance's primary and secondary nodes (copy to
11275 # prevent accidential modification)
11276 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11278 def CheckPrereq(self):
11279 """Check prerequisite.
11282 cluster = self.cfg.GetClusterInfo()
11283 default_vg = self.cfg.GetVGName()
11284 ec_id = self.proc.GetECId()
11286 if self.op.opportunistic_locking:
11287 # Only consider nodes for which a lock is held
11288 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11290 node_whitelist = None
11292 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11293 _ComputeNics(op, cluster, None,
11295 _ComputeFullBeParams(op, cluster),
11297 for op in self.op.instances]
11299 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11300 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11302 ial.Run(self.op.iallocator)
11304 if not ial.success:
11305 raise errors.OpPrereqError("Can't compute nodes using"
11306 " iallocator '%s': %s" %
11307 (self.op.iallocator, ial.info),
11308 errors.ECODE_NORES)
11310 self.ia_result = ial.result
11312 if self.op.dry_run:
11313 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11314 constants.JOB_IDS_KEY: [],
11317 def _ConstructPartialResult(self):
11318 """Contructs the partial result.
11321 (allocatable, failed) = self.ia_result
11323 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11324 map(compat.fst, allocatable),
11325 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11328 def Exec(self, feedback_fn):
11329 """Executes the opcode.
11332 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11333 (allocatable, failed) = self.ia_result
11336 for (name, nodes) in allocatable:
11337 op = op2inst.pop(name)
11340 (op.pnode, op.snode) = nodes
11342 (op.pnode,) = nodes
11346 missing = set(op2inst.keys()) - set(failed)
11347 assert not missing, \
11348 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11350 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11353 def _CheckRADOSFreeSpace():
11354 """Compute disk size requirements inside the RADOS cluster.
11357 # For the RADOS cluster we assume there is always enough space.
11361 class LUInstanceConsole(NoHooksLU):
11362 """Connect to an instance's console.
11364 This is somewhat special in that it returns the command line that
11365 you need to run on the master node in order to connect to the
11371 def ExpandNames(self):
11372 self.share_locks = _ShareAll()
11373 self._ExpandAndLockInstance()
11375 def CheckPrereq(self):
11376 """Check prerequisites.
11378 This checks that the instance is in the cluster.
11381 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11382 assert self.instance is not None, \
11383 "Cannot retrieve locked instance %s" % self.op.instance_name
11384 _CheckNodeOnline(self, self.instance.primary_node)
11386 def Exec(self, feedback_fn):
11387 """Connect to the console of an instance
11390 instance = self.instance
11391 node = instance.primary_node
11393 node_insts = self.rpc.call_instance_list([node],
11394 [instance.hypervisor])[node]
11395 node_insts.Raise("Can't get node information from %s" % node)
11397 if instance.name not in node_insts.payload:
11398 if instance.admin_state == constants.ADMINST_UP:
11399 state = constants.INSTST_ERRORDOWN
11400 elif instance.admin_state == constants.ADMINST_DOWN:
11401 state = constants.INSTST_ADMINDOWN
11403 state = constants.INSTST_ADMINOFFLINE
11404 raise errors.OpExecError("Instance %s is not running (state %s)" %
11405 (instance.name, state))
11407 logging.debug("Connecting to console of %s on %s", instance.name, node)
11409 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11412 def _GetInstanceConsole(cluster, instance):
11413 """Returns console information for an instance.
11415 @type cluster: L{objects.Cluster}
11416 @type instance: L{objects.Instance}
11420 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11421 # beparams and hvparams are passed separately, to avoid editing the
11422 # instance and then saving the defaults in the instance itself.
11423 hvparams = cluster.FillHV(instance)
11424 beparams = cluster.FillBE(instance)
11425 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11427 assert console.instance == instance.name
11428 assert console.Validate()
11430 return console.ToDict()
11433 class LUInstanceReplaceDisks(LogicalUnit):
11434 """Replace the disks of an instance.
11437 HPATH = "mirrors-replace"
11438 HTYPE = constants.HTYPE_INSTANCE
11441 def CheckArguments(self):
11442 """Check arguments.
11445 remote_node = self.op.remote_node
11446 ialloc = self.op.iallocator
11447 if self.op.mode == constants.REPLACE_DISK_CHG:
11448 if remote_node is None and ialloc is None:
11449 raise errors.OpPrereqError("When changing the secondary either an"
11450 " iallocator script must be used or the"
11451 " new node given", errors.ECODE_INVAL)
11453 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11455 elif remote_node is not None or ialloc is not None:
11456 # Not replacing the secondary
11457 raise errors.OpPrereqError("The iallocator and new node options can"
11458 " only be used when changing the"
11459 " secondary node", errors.ECODE_INVAL)
11461 def ExpandNames(self):
11462 self._ExpandAndLockInstance()
11464 assert locking.LEVEL_NODE not in self.needed_locks
11465 assert locking.LEVEL_NODE_RES not in self.needed_locks
11466 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11468 assert self.op.iallocator is None or self.op.remote_node is None, \
11469 "Conflicting options"
11471 if self.op.remote_node is not None:
11472 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11474 # Warning: do not remove the locking of the new secondary here
11475 # unless DRBD8.AddChildren is changed to work in parallel;
11476 # currently it doesn't since parallel invocations of
11477 # FindUnusedMinor will conflict
11478 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11479 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11481 self.needed_locks[locking.LEVEL_NODE] = []
11482 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11484 if self.op.iallocator is not None:
11485 # iallocator will select a new node in the same group
11486 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11487 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11489 self.needed_locks[locking.LEVEL_NODE_RES] = []
11491 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11492 self.op.iallocator, self.op.remote_node,
11493 self.op.disks, self.op.early_release,
11494 self.op.ignore_ipolicy)
11496 self.tasklets = [self.replacer]
11498 def DeclareLocks(self, level):
11499 if level == locking.LEVEL_NODEGROUP:
11500 assert self.op.remote_node is None
11501 assert self.op.iallocator is not None
11502 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11504 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11505 # Lock all groups used by instance optimistically; this requires going
11506 # via the node before it's locked, requiring verification later on
11507 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11508 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11510 elif level == locking.LEVEL_NODE:
11511 if self.op.iallocator is not None:
11512 assert self.op.remote_node is None
11513 assert not self.needed_locks[locking.LEVEL_NODE]
11514 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11516 # Lock member nodes of all locked groups
11517 self.needed_locks[locking.LEVEL_NODE] = \
11519 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11520 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11522 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11524 self._LockInstancesNodes()
11526 elif level == locking.LEVEL_NODE_RES:
11528 self.needed_locks[locking.LEVEL_NODE_RES] = \
11529 self.needed_locks[locking.LEVEL_NODE]
11531 def BuildHooksEnv(self):
11532 """Build hooks env.
11534 This runs on the master, the primary and all the secondaries.
11537 instance = self.replacer.instance
11539 "MODE": self.op.mode,
11540 "NEW_SECONDARY": self.op.remote_node,
11541 "OLD_SECONDARY": instance.secondary_nodes[0],
11543 env.update(_BuildInstanceHookEnvByObject(self, instance))
11546 def BuildHooksNodes(self):
11547 """Build hooks nodes.
11550 instance = self.replacer.instance
11552 self.cfg.GetMasterNode(),
11553 instance.primary_node,
11555 if self.op.remote_node is not None:
11556 nl.append(self.op.remote_node)
11559 def CheckPrereq(self):
11560 """Check prerequisites.
11563 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11564 self.op.iallocator is None)
11566 # Verify if node group locks are still correct
11567 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11569 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11571 return LogicalUnit.CheckPrereq(self)
11574 class TLReplaceDisks(Tasklet):
11575 """Replaces disks for an instance.
11577 Note: Locking is not within the scope of this class.
11580 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11581 disks, early_release, ignore_ipolicy):
11582 """Initializes this class.
11585 Tasklet.__init__(self, lu)
11588 self.instance_name = instance_name
11590 self.iallocator_name = iallocator_name
11591 self.remote_node = remote_node
11593 self.early_release = early_release
11594 self.ignore_ipolicy = ignore_ipolicy
11597 self.instance = None
11598 self.new_node = None
11599 self.target_node = None
11600 self.other_node = None
11601 self.remote_node_info = None
11602 self.node_secondary_ip = None
11605 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11606 """Compute a new secondary node using an IAllocator.
11609 req = iallocator.IAReqRelocate(name=instance_name,
11610 relocate_from=list(relocate_from))
11611 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11613 ial.Run(iallocator_name)
11615 if not ial.success:
11616 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11617 " %s" % (iallocator_name, ial.info),
11618 errors.ECODE_NORES)
11620 remote_node_name = ial.result[0]
11622 lu.LogInfo("Selected new secondary for instance '%s': %s",
11623 instance_name, remote_node_name)
11625 return remote_node_name
11627 def _FindFaultyDisks(self, node_name):
11628 """Wrapper for L{_FindFaultyInstanceDisks}.
11631 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11634 def _CheckDisksActivated(self, instance):
11635 """Checks if the instance disks are activated.
11637 @param instance: The instance to check disks
11638 @return: True if they are activated, False otherwise
11641 nodes = instance.all_nodes
11643 for idx, dev in enumerate(instance.disks):
11645 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11646 self.cfg.SetDiskID(dev, node)
11648 result = _BlockdevFind(self, node, dev, instance)
11652 elif result.fail_msg or not result.payload:
11657 def CheckPrereq(self):
11658 """Check prerequisites.
11660 This checks that the instance is in the cluster.
11663 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11664 assert instance is not None, \
11665 "Cannot retrieve locked instance %s" % self.instance_name
11667 if instance.disk_template != constants.DT_DRBD8:
11668 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11669 " instances", errors.ECODE_INVAL)
11671 if len(instance.secondary_nodes) != 1:
11672 raise errors.OpPrereqError("The instance has a strange layout,"
11673 " expected one secondary but found %d" %
11674 len(instance.secondary_nodes),
11675 errors.ECODE_FAULT)
11677 instance = self.instance
11678 secondary_node = instance.secondary_nodes[0]
11680 if self.iallocator_name is None:
11681 remote_node = self.remote_node
11683 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11684 instance.name, instance.secondary_nodes)
11686 if remote_node is None:
11687 self.remote_node_info = None
11689 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11690 "Remote node '%s' is not locked" % remote_node
11692 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11693 assert self.remote_node_info is not None, \
11694 "Cannot retrieve locked node %s" % remote_node
11696 if remote_node == self.instance.primary_node:
11697 raise errors.OpPrereqError("The specified node is the primary node of"
11698 " the instance", errors.ECODE_INVAL)
11700 if remote_node == secondary_node:
11701 raise errors.OpPrereqError("The specified node is already the"
11702 " secondary node of the instance",
11703 errors.ECODE_INVAL)
11705 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11706 constants.REPLACE_DISK_CHG):
11707 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11708 errors.ECODE_INVAL)
11710 if self.mode == constants.REPLACE_DISK_AUTO:
11711 if not self._CheckDisksActivated(instance):
11712 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11713 " first" % self.instance_name,
11714 errors.ECODE_STATE)
11715 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11716 faulty_secondary = self._FindFaultyDisks(secondary_node)
11718 if faulty_primary and faulty_secondary:
11719 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11720 " one node and can not be repaired"
11721 " automatically" % self.instance_name,
11722 errors.ECODE_STATE)
11725 self.disks = faulty_primary
11726 self.target_node = instance.primary_node
11727 self.other_node = secondary_node
11728 check_nodes = [self.target_node, self.other_node]
11729 elif faulty_secondary:
11730 self.disks = faulty_secondary
11731 self.target_node = secondary_node
11732 self.other_node = instance.primary_node
11733 check_nodes = [self.target_node, self.other_node]
11739 # Non-automatic modes
11740 if self.mode == constants.REPLACE_DISK_PRI:
11741 self.target_node = instance.primary_node
11742 self.other_node = secondary_node
11743 check_nodes = [self.target_node, self.other_node]
11745 elif self.mode == constants.REPLACE_DISK_SEC:
11746 self.target_node = secondary_node
11747 self.other_node = instance.primary_node
11748 check_nodes = [self.target_node, self.other_node]
11750 elif self.mode == constants.REPLACE_DISK_CHG:
11751 self.new_node = remote_node
11752 self.other_node = instance.primary_node
11753 self.target_node = secondary_node
11754 check_nodes = [self.new_node, self.other_node]
11756 _CheckNodeNotDrained(self.lu, remote_node)
11757 _CheckNodeVmCapable(self.lu, remote_node)
11759 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11760 assert old_node_info is not None
11761 if old_node_info.offline and not self.early_release:
11762 # doesn't make sense to delay the release
11763 self.early_release = True
11764 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11765 " early-release mode", secondary_node)
11768 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11771 # If not specified all disks should be replaced
11773 self.disks = range(len(self.instance.disks))
11775 # TODO: This is ugly, but right now we can't distinguish between internal
11776 # submitted opcode and external one. We should fix that.
11777 if self.remote_node_info:
11778 # We change the node, lets verify it still meets instance policy
11779 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11780 cluster = self.cfg.GetClusterInfo()
11781 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11783 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11784 self.cfg, ignore=self.ignore_ipolicy)
11786 for node in check_nodes:
11787 _CheckNodeOnline(self.lu, node)
11789 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11792 if node_name is not None)
11794 # Release unneeded node and node resource locks
11795 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11796 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11797 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11799 # Release any owned node group
11800 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11802 # Check whether disks are valid
11803 for disk_idx in self.disks:
11804 instance.FindDisk(disk_idx)
11806 # Get secondary node IP addresses
11807 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11808 in self.cfg.GetMultiNodeInfo(touched_nodes))
11810 def Exec(self, feedback_fn):
11811 """Execute disk replacement.
11813 This dispatches the disk replacement to the appropriate handler.
11817 # Verify owned locks before starting operation
11818 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11819 assert set(owned_nodes) == set(self.node_secondary_ip), \
11820 ("Incorrect node locks, owning %s, expected %s" %
11821 (owned_nodes, self.node_secondary_ip.keys()))
11822 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11823 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11824 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11826 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11827 assert list(owned_instances) == [self.instance_name], \
11828 "Instance '%s' not locked" % self.instance_name
11830 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11831 "Should not own any node group lock at this point"
11834 feedback_fn("No disks need replacement for instance '%s'" %
11835 self.instance.name)
11838 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11839 (utils.CommaJoin(self.disks), self.instance.name))
11840 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11841 feedback_fn("Current seconary node: %s" %
11842 utils.CommaJoin(self.instance.secondary_nodes))
11844 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11846 # Activate the instance disks if we're replacing them on a down instance
11848 _StartInstanceDisks(self.lu, self.instance, True)
11851 # Should we replace the secondary node?
11852 if self.new_node is not None:
11853 fn = self._ExecDrbd8Secondary
11855 fn = self._ExecDrbd8DiskOnly
11857 result = fn(feedback_fn)
11859 # Deactivate the instance disks if we're replacing them on a
11862 _SafeShutdownInstanceDisks(self.lu, self.instance)
11864 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11867 # Verify owned locks
11868 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11869 nodes = frozenset(self.node_secondary_ip)
11870 assert ((self.early_release and not owned_nodes) or
11871 (not self.early_release and not (set(owned_nodes) - nodes))), \
11872 ("Not owning the correct locks, early_release=%s, owned=%r,"
11873 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11877 def _CheckVolumeGroup(self, nodes):
11878 self.lu.LogInfo("Checking volume groups")
11880 vgname = self.cfg.GetVGName()
11882 # Make sure volume group exists on all involved nodes
11883 results = self.rpc.call_vg_list(nodes)
11885 raise errors.OpExecError("Can't list volume groups on the nodes")
11888 res = results[node]
11889 res.Raise("Error checking node %s" % node)
11890 if vgname not in res.payload:
11891 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11894 def _CheckDisksExistence(self, nodes):
11895 # Check disk existence
11896 for idx, dev in enumerate(self.instance.disks):
11897 if idx not in self.disks:
11901 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11902 self.cfg.SetDiskID(dev, node)
11904 result = _BlockdevFind(self, node, dev, self.instance)
11906 msg = result.fail_msg
11907 if msg or not result.payload:
11909 msg = "disk not found"
11910 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11913 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11914 for idx, dev in enumerate(self.instance.disks):
11915 if idx not in self.disks:
11918 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11921 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11922 on_primary, ldisk=ldisk):
11923 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11924 " replace disks for instance %s" %
11925 (node_name, self.instance.name))
11927 def _CreateNewStorage(self, node_name):
11928 """Create new storage on the primary or secondary node.
11930 This is only used for same-node replaces, not for changing the
11931 secondary node, hence we don't want to modify the existing disk.
11936 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11937 for idx, dev in enumerate(disks):
11938 if idx not in self.disks:
11941 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11943 self.cfg.SetDiskID(dev, node_name)
11945 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11946 names = _GenerateUniqueNames(self.lu, lv_names)
11948 (data_disk, meta_disk) = dev.children
11949 vg_data = data_disk.logical_id[0]
11950 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11951 logical_id=(vg_data, names[0]),
11952 params=data_disk.params)
11953 vg_meta = meta_disk.logical_id[0]
11954 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11955 size=constants.DRBD_META_SIZE,
11956 logical_id=(vg_meta, names[1]),
11957 params=meta_disk.params)
11959 new_lvs = [lv_data, lv_meta]
11960 old_lvs = [child.Copy() for child in dev.children]
11961 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11962 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11964 # we pass force_create=True to force the LVM creation
11965 for new_lv in new_lvs:
11966 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11967 _GetInstanceInfoText(self.instance), False,
11972 def _CheckDevices(self, node_name, iv_names):
11973 for name, (dev, _, _) in iv_names.iteritems():
11974 self.cfg.SetDiskID(dev, node_name)
11976 result = _BlockdevFind(self, node_name, dev, self.instance)
11978 msg = result.fail_msg
11979 if msg or not result.payload:
11981 msg = "disk not found"
11982 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11985 if result.payload.is_degraded:
11986 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11988 def _RemoveOldStorage(self, node_name, iv_names):
11989 for name, (_, old_lvs, _) in iv_names.iteritems():
11990 self.lu.LogInfo("Remove logical volumes for %s", name)
11993 self.cfg.SetDiskID(lv, node_name)
11995 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11997 self.lu.LogWarning("Can't remove old LV: %s", msg,
11998 hint="remove unused LVs manually")
12000 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
12001 """Replace a disk on the primary or secondary for DRBD 8.
12003 The algorithm for replace is quite complicated:
12005 1. for each disk to be replaced:
12007 1. create new LVs on the target node with unique names
12008 1. detach old LVs from the drbd device
12009 1. rename old LVs to name_replaced.<time_t>
12010 1. rename new LVs to old LVs
12011 1. attach the new LVs (with the old names now) to the drbd device
12013 1. wait for sync across all devices
12015 1. for each modified disk:
12017 1. remove old LVs (which have the name name_replaces.<time_t>)
12019 Failures are not very well handled.
12024 # Step: check device activation
12025 self.lu.LogStep(1, steps_total, "Check device existence")
12026 self._CheckDisksExistence([self.other_node, self.target_node])
12027 self._CheckVolumeGroup([self.target_node, self.other_node])
12029 # Step: check other node consistency
12030 self.lu.LogStep(2, steps_total, "Check peer consistency")
12031 self._CheckDisksConsistency(self.other_node,
12032 self.other_node == self.instance.primary_node,
12035 # Step: create new storage
12036 self.lu.LogStep(3, steps_total, "Allocate new storage")
12037 iv_names = self._CreateNewStorage(self.target_node)
12039 # Step: for each lv, detach+rename*2+attach
12040 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12041 for dev, old_lvs, new_lvs in iv_names.itervalues():
12042 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12044 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12046 result.Raise("Can't detach drbd from local storage on node"
12047 " %s for device %s" % (self.target_node, dev.iv_name))
12049 #cfg.Update(instance)
12051 # ok, we created the new LVs, so now we know we have the needed
12052 # storage; as such, we proceed on the target node to rename
12053 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12054 # using the assumption that logical_id == physical_id (which in
12055 # turn is the unique_id on that node)
12057 # FIXME(iustin): use a better name for the replaced LVs
12058 temp_suffix = int(time.time())
12059 ren_fn = lambda d, suff: (d.physical_id[0],
12060 d.physical_id[1] + "_replaced-%s" % suff)
12062 # Build the rename list based on what LVs exist on the node
12063 rename_old_to_new = []
12064 for to_ren in old_lvs:
12065 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12066 if not result.fail_msg and result.payload:
12068 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12070 self.lu.LogInfo("Renaming the old LVs on the target node")
12071 result = self.rpc.call_blockdev_rename(self.target_node,
12073 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12075 # Now we rename the new LVs to the old LVs
12076 self.lu.LogInfo("Renaming the new LVs on the target node")
12077 rename_new_to_old = [(new, old.physical_id)
12078 for old, new in zip(old_lvs, new_lvs)]
12079 result = self.rpc.call_blockdev_rename(self.target_node,
12081 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12083 # Intermediate steps of in memory modifications
12084 for old, new in zip(old_lvs, new_lvs):
12085 new.logical_id = old.logical_id
12086 self.cfg.SetDiskID(new, self.target_node)
12088 # We need to modify old_lvs so that removal later removes the
12089 # right LVs, not the newly added ones; note that old_lvs is a
12091 for disk in old_lvs:
12092 disk.logical_id = ren_fn(disk, temp_suffix)
12093 self.cfg.SetDiskID(disk, self.target_node)
12095 # Now that the new lvs have the old name, we can add them to the device
12096 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12097 result = self.rpc.call_blockdev_addchildren(self.target_node,
12098 (dev, self.instance), new_lvs)
12099 msg = result.fail_msg
12101 for new_lv in new_lvs:
12102 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12105 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12106 hint=("cleanup manually the unused logical"
12108 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12110 cstep = itertools.count(5)
12112 if self.early_release:
12113 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12114 self._RemoveOldStorage(self.target_node, iv_names)
12115 # TODO: Check if releasing locks early still makes sense
12116 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12118 # Release all resource locks except those used by the instance
12119 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12120 keep=self.node_secondary_ip.keys())
12122 # Release all node locks while waiting for sync
12123 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12125 # TODO: Can the instance lock be downgraded here? Take the optional disk
12126 # shutdown in the caller into consideration.
12129 # This can fail as the old devices are degraded and _WaitForSync
12130 # does a combined result over all disks, so we don't check its return value
12131 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12132 _WaitForSync(self.lu, self.instance)
12134 # Check all devices manually
12135 self._CheckDevices(self.instance.primary_node, iv_names)
12137 # Step: remove old storage
12138 if not self.early_release:
12139 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12140 self._RemoveOldStorage(self.target_node, iv_names)
12142 def _ExecDrbd8Secondary(self, feedback_fn):
12143 """Replace the secondary node for DRBD 8.
12145 The algorithm for replace is quite complicated:
12146 - for all disks of the instance:
12147 - create new LVs on the new node with same names
12148 - shutdown the drbd device on the old secondary
12149 - disconnect the drbd network on the primary
12150 - create the drbd device on the new secondary
12151 - network attach the drbd on the primary, using an artifice:
12152 the drbd code for Attach() will connect to the network if it
12153 finds a device which is connected to the good local disks but
12154 not network enabled
12155 - wait for sync across all devices
12156 - remove all disks from the old secondary
12158 Failures are not very well handled.
12163 pnode = self.instance.primary_node
12165 # Step: check device activation
12166 self.lu.LogStep(1, steps_total, "Check device existence")
12167 self._CheckDisksExistence([self.instance.primary_node])
12168 self._CheckVolumeGroup([self.instance.primary_node])
12170 # Step: check other node consistency
12171 self.lu.LogStep(2, steps_total, "Check peer consistency")
12172 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12174 # Step: create new storage
12175 self.lu.LogStep(3, steps_total, "Allocate new storage")
12176 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12177 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12178 for idx, dev in enumerate(disks):
12179 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12180 (self.new_node, idx))
12181 # we pass force_create=True to force LVM creation
12182 for new_lv in dev.children:
12183 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12184 True, _GetInstanceInfoText(self.instance), False,
12187 # Step 4: dbrd minors and drbd setups changes
12188 # after this, we must manually remove the drbd minors on both the
12189 # error and the success paths
12190 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12191 minors = self.cfg.AllocateDRBDMinor([self.new_node
12192 for dev in self.instance.disks],
12193 self.instance.name)
12194 logging.debug("Allocated minors %r", minors)
12197 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12198 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12199 (self.new_node, idx))
12200 # create new devices on new_node; note that we create two IDs:
12201 # one without port, so the drbd will be activated without
12202 # networking information on the new node at this stage, and one
12203 # with network, for the latter activation in step 4
12204 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12205 if self.instance.primary_node == o_node1:
12208 assert self.instance.primary_node == o_node2, "Three-node instance?"
12211 new_alone_id = (self.instance.primary_node, self.new_node, None,
12212 p_minor, new_minor, o_secret)
12213 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12214 p_minor, new_minor, o_secret)
12216 iv_names[idx] = (dev, dev.children, new_net_id)
12217 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12219 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12220 logical_id=new_alone_id,
12221 children=dev.children,
12224 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12227 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12229 _GetInstanceInfoText(self.instance), False,
12231 except errors.GenericError:
12232 self.cfg.ReleaseDRBDMinors(self.instance.name)
12235 # We have new devices, shutdown the drbd on the old secondary
12236 for idx, dev in enumerate(self.instance.disks):
12237 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12238 self.cfg.SetDiskID(dev, self.target_node)
12239 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12240 (dev, self.instance)).fail_msg
12242 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12243 "node: %s" % (idx, msg),
12244 hint=("Please cleanup this device manually as"
12245 " soon as possible"))
12247 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12248 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12249 self.instance.disks)[pnode]
12251 msg = result.fail_msg
12253 # detaches didn't succeed (unlikely)
12254 self.cfg.ReleaseDRBDMinors(self.instance.name)
12255 raise errors.OpExecError("Can't detach the disks from the network on"
12256 " old node: %s" % (msg,))
12258 # if we managed to detach at least one, we update all the disks of
12259 # the instance to point to the new secondary
12260 self.lu.LogInfo("Updating instance configuration")
12261 for dev, _, new_logical_id in iv_names.itervalues():
12262 dev.logical_id = new_logical_id
12263 self.cfg.SetDiskID(dev, self.instance.primary_node)
12265 self.cfg.Update(self.instance, feedback_fn)
12267 # Release all node locks (the configuration has been updated)
12268 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12270 # and now perform the drbd attach
12271 self.lu.LogInfo("Attaching primary drbds to new secondary"
12272 " (standalone => connected)")
12273 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12275 self.node_secondary_ip,
12276 (self.instance.disks, self.instance),
12277 self.instance.name,
12279 for to_node, to_result in result.items():
12280 msg = to_result.fail_msg
12282 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12284 hint=("please do a gnt-instance info to see the"
12285 " status of disks"))
12287 cstep = itertools.count(5)
12289 if self.early_release:
12290 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12291 self._RemoveOldStorage(self.target_node, iv_names)
12292 # TODO: Check if releasing locks early still makes sense
12293 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12295 # Release all resource locks except those used by the instance
12296 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12297 keep=self.node_secondary_ip.keys())
12299 # TODO: Can the instance lock be downgraded here? Take the optional disk
12300 # shutdown in the caller into consideration.
12303 # This can fail as the old devices are degraded and _WaitForSync
12304 # does a combined result over all disks, so we don't check its return value
12305 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12306 _WaitForSync(self.lu, self.instance)
12308 # Check all devices manually
12309 self._CheckDevices(self.instance.primary_node, iv_names)
12311 # Step: remove old storage
12312 if not self.early_release:
12313 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12314 self._RemoveOldStorage(self.target_node, iv_names)
12317 class LURepairNodeStorage(NoHooksLU):
12318 """Repairs the volume group on a node.
12323 def CheckArguments(self):
12324 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12326 storage_type = self.op.storage_type
12328 if (constants.SO_FIX_CONSISTENCY not in
12329 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12330 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12331 " repaired" % storage_type,
12332 errors.ECODE_INVAL)
12334 def ExpandNames(self):
12335 self.needed_locks = {
12336 locking.LEVEL_NODE: [self.op.node_name],
12339 def _CheckFaultyDisks(self, instance, node_name):
12340 """Ensure faulty disks abort the opcode or at least warn."""
12342 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12344 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12345 " node '%s'" % (instance.name, node_name),
12346 errors.ECODE_STATE)
12347 except errors.OpPrereqError, err:
12348 if self.op.ignore_consistency:
12349 self.LogWarning(str(err.args[0]))
12353 def CheckPrereq(self):
12354 """Check prerequisites.
12357 # Check whether any instance on this node has faulty disks
12358 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12359 if inst.admin_state != constants.ADMINST_UP:
12361 check_nodes = set(inst.all_nodes)
12362 check_nodes.discard(self.op.node_name)
12363 for inst_node_name in check_nodes:
12364 self._CheckFaultyDisks(inst, inst_node_name)
12366 def Exec(self, feedback_fn):
12367 feedback_fn("Repairing storage unit '%s' on %s ..." %
12368 (self.op.name, self.op.node_name))
12370 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12371 result = self.rpc.call_storage_execute(self.op.node_name,
12372 self.op.storage_type, st_args,
12374 constants.SO_FIX_CONSISTENCY)
12375 result.Raise("Failed to repair storage unit '%s' on %s" %
12376 (self.op.name, self.op.node_name))
12379 class LUNodeEvacuate(NoHooksLU):
12380 """Evacuates instances off a list of nodes.
12385 _MODE2IALLOCATOR = {
12386 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12387 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12388 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12390 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12391 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12392 constants.IALLOCATOR_NEVAC_MODES)
12394 def CheckArguments(self):
12395 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12397 def ExpandNames(self):
12398 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12400 if self.op.remote_node is not None:
12401 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12402 assert self.op.remote_node
12404 if self.op.remote_node == self.op.node_name:
12405 raise errors.OpPrereqError("Can not use evacuated node as a new"
12406 " secondary node", errors.ECODE_INVAL)
12408 if self.op.mode != constants.NODE_EVAC_SEC:
12409 raise errors.OpPrereqError("Without the use of an iallocator only"
12410 " secondary instances can be evacuated",
12411 errors.ECODE_INVAL)
12414 self.share_locks = _ShareAll()
12415 self.needed_locks = {
12416 locking.LEVEL_INSTANCE: [],
12417 locking.LEVEL_NODEGROUP: [],
12418 locking.LEVEL_NODE: [],
12421 # Determine nodes (via group) optimistically, needs verification once locks
12422 # have been acquired
12423 self.lock_nodes = self._DetermineNodes()
12425 def _DetermineNodes(self):
12426 """Gets the list of nodes to operate on.
12429 if self.op.remote_node is None:
12430 # Iallocator will choose any node(s) in the same group
12431 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12433 group_nodes = frozenset([self.op.remote_node])
12435 # Determine nodes to be locked
12436 return set([self.op.node_name]) | group_nodes
12438 def _DetermineInstances(self):
12439 """Builds list of instances to operate on.
12442 assert self.op.mode in constants.NODE_EVAC_MODES
12444 if self.op.mode == constants.NODE_EVAC_PRI:
12445 # Primary instances only
12446 inst_fn = _GetNodePrimaryInstances
12447 assert self.op.remote_node is None, \
12448 "Evacuating primary instances requires iallocator"
12449 elif self.op.mode == constants.NODE_EVAC_SEC:
12450 # Secondary instances only
12451 inst_fn = _GetNodeSecondaryInstances
12454 assert self.op.mode == constants.NODE_EVAC_ALL
12455 inst_fn = _GetNodeInstances
12456 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12458 raise errors.OpPrereqError("Due to an issue with the iallocator"
12459 " interface it is not possible to evacuate"
12460 " all instances at once; specify explicitly"
12461 " whether to evacuate primary or secondary"
12463 errors.ECODE_INVAL)
12465 return inst_fn(self.cfg, self.op.node_name)
12467 def DeclareLocks(self, level):
12468 if level == locking.LEVEL_INSTANCE:
12469 # Lock instances optimistically, needs verification once node and group
12470 # locks have been acquired
12471 self.needed_locks[locking.LEVEL_INSTANCE] = \
12472 set(i.name for i in self._DetermineInstances())
12474 elif level == locking.LEVEL_NODEGROUP:
12475 # Lock node groups for all potential target nodes optimistically, needs
12476 # verification once nodes have been acquired
12477 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12478 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12480 elif level == locking.LEVEL_NODE:
12481 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12483 def CheckPrereq(self):
12485 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12486 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12487 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12489 need_nodes = self._DetermineNodes()
12491 if not owned_nodes.issuperset(need_nodes):
12492 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12493 " locks were acquired, current nodes are"
12494 " are '%s', used to be '%s'; retry the"
12496 (self.op.node_name,
12497 utils.CommaJoin(need_nodes),
12498 utils.CommaJoin(owned_nodes)),
12499 errors.ECODE_STATE)
12501 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12502 if owned_groups != wanted_groups:
12503 raise errors.OpExecError("Node groups changed since locks were acquired,"
12504 " current groups are '%s', used to be '%s';"
12505 " retry the operation" %
12506 (utils.CommaJoin(wanted_groups),
12507 utils.CommaJoin(owned_groups)))
12509 # Determine affected instances
12510 self.instances = self._DetermineInstances()
12511 self.instance_names = [i.name for i in self.instances]
12513 if set(self.instance_names) != owned_instances:
12514 raise errors.OpExecError("Instances on node '%s' changed since locks"
12515 " were acquired, current instances are '%s',"
12516 " used to be '%s'; retry the operation" %
12517 (self.op.node_name,
12518 utils.CommaJoin(self.instance_names),
12519 utils.CommaJoin(owned_instances)))
12521 if self.instance_names:
12522 self.LogInfo("Evacuating instances from node '%s': %s",
12524 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12526 self.LogInfo("No instances to evacuate from node '%s'",
12529 if self.op.remote_node is not None:
12530 for i in self.instances:
12531 if i.primary_node == self.op.remote_node:
12532 raise errors.OpPrereqError("Node %s is the primary node of"
12533 " instance %s, cannot use it as"
12535 (self.op.remote_node, i.name),
12536 errors.ECODE_INVAL)
12538 def Exec(self, feedback_fn):
12539 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12541 if not self.instance_names:
12542 # No instances to evacuate
12545 elif self.op.iallocator is not None:
12546 # TODO: Implement relocation to other group
12547 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12548 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12549 instances=list(self.instance_names))
12550 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12552 ial.Run(self.op.iallocator)
12554 if not ial.success:
12555 raise errors.OpPrereqError("Can't compute node evacuation using"
12556 " iallocator '%s': %s" %
12557 (self.op.iallocator, ial.info),
12558 errors.ECODE_NORES)
12560 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12562 elif self.op.remote_node is not None:
12563 assert self.op.mode == constants.NODE_EVAC_SEC
12565 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12566 remote_node=self.op.remote_node,
12568 mode=constants.REPLACE_DISK_CHG,
12569 early_release=self.op.early_release)]
12570 for instance_name in self.instance_names]
12573 raise errors.ProgrammerError("No iallocator or remote node")
12575 return ResultWithJobs(jobs)
12578 def _SetOpEarlyRelease(early_release, op):
12579 """Sets C{early_release} flag on opcodes if available.
12583 op.early_release = early_release
12584 except AttributeError:
12585 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12590 def _NodeEvacDest(use_nodes, group, nodes):
12591 """Returns group or nodes depending on caller's choice.
12595 return utils.CommaJoin(nodes)
12600 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12601 """Unpacks the result of change-group and node-evacuate iallocator requests.
12603 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12604 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12606 @type lu: L{LogicalUnit}
12607 @param lu: Logical unit instance
12608 @type alloc_result: tuple/list
12609 @param alloc_result: Result from iallocator
12610 @type early_release: bool
12611 @param early_release: Whether to release locks early if possible
12612 @type use_nodes: bool
12613 @param use_nodes: Whether to display node names instead of groups
12616 (moved, failed, jobs) = alloc_result
12619 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12620 for (name, reason) in failed)
12621 lu.LogWarning("Unable to evacuate instances %s", failreason)
12622 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12625 lu.LogInfo("Instances to be moved: %s",
12626 utils.CommaJoin("%s (to %s)" %
12627 (name, _NodeEvacDest(use_nodes, group, nodes))
12628 for (name, group, nodes) in moved))
12630 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12631 map(opcodes.OpCode.LoadOpCode, ops))
12635 def _DiskSizeInBytesToMebibytes(lu, size):
12636 """Converts a disk size in bytes to mebibytes.
12638 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12641 (mib, remainder) = divmod(size, 1024 * 1024)
12644 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12645 " to not overwrite existing data (%s bytes will not be"
12646 " wiped)", (1024 * 1024) - remainder)
12652 class LUInstanceGrowDisk(LogicalUnit):
12653 """Grow a disk of an instance.
12656 HPATH = "disk-grow"
12657 HTYPE = constants.HTYPE_INSTANCE
12660 def ExpandNames(self):
12661 self._ExpandAndLockInstance()
12662 self.needed_locks[locking.LEVEL_NODE] = []
12663 self.needed_locks[locking.LEVEL_NODE_RES] = []
12664 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12665 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12667 def DeclareLocks(self, level):
12668 if level == locking.LEVEL_NODE:
12669 self._LockInstancesNodes()
12670 elif level == locking.LEVEL_NODE_RES:
12672 self.needed_locks[locking.LEVEL_NODE_RES] = \
12673 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12675 def BuildHooksEnv(self):
12676 """Build hooks env.
12678 This runs on the master, the primary and all the secondaries.
12682 "DISK": self.op.disk,
12683 "AMOUNT": self.op.amount,
12684 "ABSOLUTE": self.op.absolute,
12686 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12689 def BuildHooksNodes(self):
12690 """Build hooks nodes.
12693 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12696 def CheckPrereq(self):
12697 """Check prerequisites.
12699 This checks that the instance is in the cluster.
12702 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12703 assert instance is not None, \
12704 "Cannot retrieve locked instance %s" % self.op.instance_name
12705 nodenames = list(instance.all_nodes)
12706 for node in nodenames:
12707 _CheckNodeOnline(self, node)
12709 self.instance = instance
12711 if instance.disk_template not in constants.DTS_GROWABLE:
12712 raise errors.OpPrereqError("Instance's disk layout does not support"
12713 " growing", errors.ECODE_INVAL)
12715 self.disk = instance.FindDisk(self.op.disk)
12717 if self.op.absolute:
12718 self.target = self.op.amount
12719 self.delta = self.target - self.disk.size
12721 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12722 "current disk size (%s)" %
12723 (utils.FormatUnit(self.target, "h"),
12724 utils.FormatUnit(self.disk.size, "h")),
12725 errors.ECODE_STATE)
12727 self.delta = self.op.amount
12728 self.target = self.disk.size + self.delta
12730 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12731 utils.FormatUnit(self.delta, "h"),
12732 errors.ECODE_INVAL)
12734 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12736 def _CheckDiskSpace(self, nodenames, req_vgspace):
12737 template = self.instance.disk_template
12738 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12739 # TODO: check the free disk space for file, when that feature will be
12741 nodes = map(self.cfg.GetNodeInfo, nodenames)
12742 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12745 # With exclusive storage we need to something smarter than just looking
12746 # at free space; for now, let's simply abort the operation.
12747 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12748 " is enabled", errors.ECODE_STATE)
12749 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12751 def Exec(self, feedback_fn):
12752 """Execute disk grow.
12755 instance = self.instance
12758 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12759 assert (self.owned_locks(locking.LEVEL_NODE) ==
12760 self.owned_locks(locking.LEVEL_NODE_RES))
12762 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12764 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12766 raise errors.OpExecError("Cannot activate block device to grow")
12768 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12769 (self.op.disk, instance.name,
12770 utils.FormatUnit(self.delta, "h"),
12771 utils.FormatUnit(self.target, "h")))
12773 # First run all grow ops in dry-run mode
12774 for node in instance.all_nodes:
12775 self.cfg.SetDiskID(disk, node)
12776 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12778 result.Raise("Dry-run grow request failed to node %s" % node)
12781 # Get disk size from primary node for wiping
12782 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12783 result.Raise("Failed to retrieve disk size from node '%s'" %
12784 instance.primary_node)
12786 (disk_size_in_bytes, ) = result.payload
12788 if disk_size_in_bytes is None:
12789 raise errors.OpExecError("Failed to retrieve disk size from primary"
12790 " node '%s'" % instance.primary_node)
12792 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12794 assert old_disk_size >= disk.size, \
12795 ("Retrieved disk size too small (got %s, should be at least %s)" %
12796 (old_disk_size, disk.size))
12798 old_disk_size = None
12800 # We know that (as far as we can test) operations across different
12801 # nodes will succeed, time to run it for real on the backing storage
12802 for node in instance.all_nodes:
12803 self.cfg.SetDiskID(disk, node)
12804 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12806 result.Raise("Grow request failed to node %s" % node)
12808 # And now execute it for logical storage, on the primary node
12809 node = instance.primary_node
12810 self.cfg.SetDiskID(disk, node)
12811 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12813 result.Raise("Grow request failed to node %s" % node)
12815 disk.RecordGrow(self.delta)
12816 self.cfg.Update(instance, feedback_fn)
12818 # Changes have been recorded, release node lock
12819 _ReleaseLocks(self, locking.LEVEL_NODE)
12821 # Downgrade lock while waiting for sync
12822 self.glm.downgrade(locking.LEVEL_INSTANCE)
12824 assert wipe_disks ^ (old_disk_size is None)
12827 assert instance.disks[self.op.disk] == disk
12829 # Wipe newly added disk space
12830 _WipeDisks(self, instance,
12831 disks=[(self.op.disk, disk, old_disk_size)])
12833 if self.op.wait_for_sync:
12834 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12836 self.LogWarning("Disk syncing has not returned a good status; check"
12838 if instance.admin_state != constants.ADMINST_UP:
12839 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12840 elif instance.admin_state != constants.ADMINST_UP:
12841 self.LogWarning("Not shutting down the disk even if the instance is"
12842 " not supposed to be running because no wait for"
12843 " sync mode was requested")
12845 assert self.owned_locks(locking.LEVEL_NODE_RES)
12846 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12849 class LUInstanceQueryData(NoHooksLU):
12850 """Query runtime instance data.
12855 def ExpandNames(self):
12856 self.needed_locks = {}
12858 # Use locking if requested or when non-static information is wanted
12859 if not (self.op.static or self.op.use_locking):
12860 self.LogWarning("Non-static data requested, locks need to be acquired")
12861 self.op.use_locking = True
12863 if self.op.instances or not self.op.use_locking:
12864 # Expand instance names right here
12865 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12867 # Will use acquired locks
12868 self.wanted_names = None
12870 if self.op.use_locking:
12871 self.share_locks = _ShareAll()
12873 if self.wanted_names is None:
12874 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12876 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12878 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12879 self.needed_locks[locking.LEVEL_NODE] = []
12880 self.needed_locks[locking.LEVEL_NETWORK] = []
12881 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12883 def DeclareLocks(self, level):
12884 if self.op.use_locking:
12885 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12886 if level == locking.LEVEL_NODEGROUP:
12888 # Lock all groups used by instances optimistically; this requires going
12889 # via the node before it's locked, requiring verification later on
12890 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12891 frozenset(group_uuid
12892 for instance_name in owned_instances
12894 self.cfg.GetInstanceNodeGroups(instance_name))
12896 elif level == locking.LEVEL_NODE:
12897 self._LockInstancesNodes()
12899 elif level == locking.LEVEL_NETWORK:
12900 self.needed_locks[locking.LEVEL_NETWORK] = \
12902 for instance_name in owned_instances
12904 self.cfg.GetInstanceNetworks(instance_name))
12906 def CheckPrereq(self):
12907 """Check prerequisites.
12909 This only checks the optional instance list against the existing names.
12912 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12913 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12914 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12915 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12917 if self.wanted_names is None:
12918 assert self.op.use_locking, "Locking was not used"
12919 self.wanted_names = owned_instances
12921 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12923 if self.op.use_locking:
12924 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12927 assert not (owned_instances or owned_groups or
12928 owned_nodes or owned_networks)
12930 self.wanted_instances = instances.values()
12932 def _ComputeBlockdevStatus(self, node, instance, dev):
12933 """Returns the status of a block device
12936 if self.op.static or not node:
12939 self.cfg.SetDiskID(dev, node)
12941 result = self.rpc.call_blockdev_find(node, dev)
12945 result.Raise("Can't compute disk status for %s" % instance.name)
12947 status = result.payload
12951 return (status.dev_path, status.major, status.minor,
12952 status.sync_percent, status.estimated_time,
12953 status.is_degraded, status.ldisk_status)
12955 def _ComputeDiskStatus(self, instance, snode, dev):
12956 """Compute block device status.
12959 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12961 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12963 def _ComputeDiskStatusInner(self, instance, snode, dev):
12964 """Compute block device status.
12966 @attention: The device has to be annotated already.
12969 if dev.dev_type in constants.LDS_DRBD:
12970 # we change the snode then (otherwise we use the one passed in)
12971 if dev.logical_id[0] == instance.primary_node:
12972 snode = dev.logical_id[1]
12974 snode = dev.logical_id[0]
12976 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12978 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12981 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12988 "iv_name": dev.iv_name,
12989 "dev_type": dev.dev_type,
12990 "logical_id": dev.logical_id,
12991 "physical_id": dev.physical_id,
12992 "pstatus": dev_pstatus,
12993 "sstatus": dev_sstatus,
12994 "children": dev_children,
12999 def Exec(self, feedback_fn):
13000 """Gather and return data"""
13003 cluster = self.cfg.GetClusterInfo()
13005 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
13006 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
13008 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
13009 for node in nodes.values()))
13011 group2name_fn = lambda uuid: groups[uuid].name
13012 for instance in self.wanted_instances:
13013 pnode = nodes[instance.primary_node]
13015 if self.op.static or pnode.offline:
13016 remote_state = None
13018 self.LogWarning("Primary node %s is marked offline, returning static"
13019 " information only for instance %s" %
13020 (pnode.name, instance.name))
13022 remote_info = self.rpc.call_instance_info(instance.primary_node,
13024 instance.hypervisor)
13025 remote_info.Raise("Error checking node %s" % instance.primary_node)
13026 remote_info = remote_info.payload
13027 if remote_info and "state" in remote_info:
13028 remote_state = "up"
13030 if instance.admin_state == constants.ADMINST_UP:
13031 remote_state = "down"
13033 remote_state = instance.admin_state
13035 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13038 snodes_group_uuids = [nodes[snode_name].group
13039 for snode_name in instance.secondary_nodes]
13041 result[instance.name] = {
13042 "name": instance.name,
13043 "config_state": instance.admin_state,
13044 "run_state": remote_state,
13045 "pnode": instance.primary_node,
13046 "pnode_group_uuid": pnode.group,
13047 "pnode_group_name": group2name_fn(pnode.group),
13048 "snodes": instance.secondary_nodes,
13049 "snodes_group_uuids": snodes_group_uuids,
13050 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13052 # this happens to be the same format used for hooks
13053 "nics": _NICListToTuple(self, instance.nics),
13054 "disk_template": instance.disk_template,
13056 "hypervisor": instance.hypervisor,
13057 "network_port": instance.network_port,
13058 "hv_instance": instance.hvparams,
13059 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13060 "be_instance": instance.beparams,
13061 "be_actual": cluster.FillBE(instance),
13062 "os_instance": instance.osparams,
13063 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13064 "serial_no": instance.serial_no,
13065 "mtime": instance.mtime,
13066 "ctime": instance.ctime,
13067 "uuid": instance.uuid,
13073 def PrepareContainerMods(mods, private_fn):
13074 """Prepares a list of container modifications by adding a private data field.
13076 @type mods: list of tuples; (operation, index, parameters)
13077 @param mods: List of modifications
13078 @type private_fn: callable or None
13079 @param private_fn: Callable for constructing a private data field for a
13084 if private_fn is None:
13089 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13092 #: Type description for changes as returned by L{ApplyContainerMods}'s
13094 _TApplyContModsCbChanges = \
13095 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13096 ht.TNonEmptyString,
13101 def ApplyContainerMods(kind, container, chgdesc, mods,
13102 create_fn, modify_fn, remove_fn):
13103 """Applies descriptions in C{mods} to C{container}.
13106 @param kind: One-word item description
13107 @type container: list
13108 @param container: Container to modify
13109 @type chgdesc: None or list
13110 @param chgdesc: List of applied changes
13112 @param mods: Modifications as returned by L{PrepareContainerMods}
13113 @type create_fn: callable
13114 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13115 receives absolute item index, parameters and private data object as added
13116 by L{PrepareContainerMods}, returns tuple containing new item and changes
13118 @type modify_fn: callable
13119 @param modify_fn: Callback for modifying an existing item
13120 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13121 and private data object as added by L{PrepareContainerMods}, returns
13123 @type remove_fn: callable
13124 @param remove_fn: Callback on removing item; receives absolute item index,
13125 item and private data object as added by L{PrepareContainerMods}
13128 for (op, idx, params, private) in mods:
13131 absidx = len(container) - 1
13133 raise IndexError("Not accepting negative indices other than -1")
13134 elif idx > len(container):
13135 raise IndexError("Got %s index %s, but there are only %s" %
13136 (kind, idx, len(container)))
13142 if op == constants.DDM_ADD:
13143 # Calculate where item will be added
13145 addidx = len(container)
13149 if create_fn is None:
13152 (item, changes) = create_fn(addidx, params, private)
13155 container.append(item)
13158 assert idx <= len(container)
13159 # list.insert does so before the specified index
13160 container.insert(idx, item)
13162 # Retrieve existing item
13164 item = container[absidx]
13166 raise IndexError("Invalid %s index %s" % (kind, idx))
13168 if op == constants.DDM_REMOVE:
13171 if remove_fn is not None:
13172 remove_fn(absidx, item, private)
13174 changes = [("%s/%s" % (kind, absidx), "remove")]
13176 assert container[absidx] == item
13177 del container[absidx]
13178 elif op == constants.DDM_MODIFY:
13179 if modify_fn is not None:
13180 changes = modify_fn(absidx, item, params, private)
13182 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13184 assert _TApplyContModsCbChanges(changes)
13186 if not (chgdesc is None or changes is None):
13187 chgdesc.extend(changes)
13190 def _UpdateIvNames(base_index, disks):
13191 """Updates the C{iv_name} attribute of disks.
13193 @type disks: list of L{objects.Disk}
13196 for (idx, disk) in enumerate(disks):
13197 disk.iv_name = "disk/%s" % (base_index + idx, )
13200 class _InstNicModPrivate:
13201 """Data structure for network interface modifications.
13203 Used by L{LUInstanceSetParams}.
13206 def __init__(self):
13211 class LUInstanceSetParams(LogicalUnit):
13212 """Modifies an instances's parameters.
13215 HPATH = "instance-modify"
13216 HTYPE = constants.HTYPE_INSTANCE
13220 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13221 assert ht.TList(mods)
13222 assert not mods or len(mods[0]) in (2, 3)
13224 if mods and len(mods[0]) == 2:
13228 for op, params in mods:
13229 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13230 result.append((op, -1, params))
13234 raise errors.OpPrereqError("Only one %s add or remove operation is"
13235 " supported at a time" % kind,
13236 errors.ECODE_INVAL)
13238 result.append((constants.DDM_MODIFY, op, params))
13240 assert verify_fn(result)
13247 def _CheckMods(kind, mods, key_types, item_fn):
13248 """Ensures requested disk/NIC modifications are valid.
13251 for (op, _, params) in mods:
13252 assert ht.TDict(params)
13254 # If 'key_types' is an empty dict, we assume we have an
13255 # 'ext' template and thus do not ForceDictType
13257 utils.ForceDictType(params, key_types)
13259 if op == constants.DDM_REMOVE:
13261 raise errors.OpPrereqError("No settings should be passed when"
13262 " removing a %s" % kind,
13263 errors.ECODE_INVAL)
13264 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13265 item_fn(op, params)
13267 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13270 def _VerifyDiskModification(op, params):
13271 """Verifies a disk modification.
13274 if op == constants.DDM_ADD:
13275 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13276 if mode not in constants.DISK_ACCESS_SET:
13277 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13278 errors.ECODE_INVAL)
13280 size = params.get(constants.IDISK_SIZE, None)
13282 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13283 constants.IDISK_SIZE, errors.ECODE_INVAL)
13287 except (TypeError, ValueError), err:
13288 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13289 errors.ECODE_INVAL)
13291 params[constants.IDISK_SIZE] = size
13293 elif op == constants.DDM_MODIFY:
13294 if constants.IDISK_SIZE in params:
13295 raise errors.OpPrereqError("Disk size change not possible, use"
13296 " grow-disk", errors.ECODE_INVAL)
13297 if constants.IDISK_MODE not in params:
13298 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13299 " modification supported, but missing",
13300 errors.ECODE_NOENT)
13301 if len(params) > 1:
13302 raise errors.OpPrereqError("Disk modification doesn't support"
13303 " additional arbitrary parameters",
13304 errors.ECODE_INVAL)
13307 def _VerifyNicModification(op, params):
13308 """Verifies a network interface modification.
13311 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13312 ip = params.get(constants.INIC_IP, None)
13313 req_net = params.get(constants.INIC_NETWORK, None)
13314 link = params.get(constants.NIC_LINK, None)
13315 mode = params.get(constants.NIC_MODE, None)
13316 if req_net is not None:
13317 if req_net.lower() == constants.VALUE_NONE:
13318 params[constants.INIC_NETWORK] = None
13320 elif link is not None or mode is not None:
13321 raise errors.OpPrereqError("If network is given"
13322 " mode or link should not",
13323 errors.ECODE_INVAL)
13325 if op == constants.DDM_ADD:
13326 macaddr = params.get(constants.INIC_MAC, None)
13327 if macaddr is None:
13328 params[constants.INIC_MAC] = constants.VALUE_AUTO
13331 if ip.lower() == constants.VALUE_NONE:
13332 params[constants.INIC_IP] = None
13334 if ip.lower() == constants.NIC_IP_POOL:
13335 if op == constants.DDM_ADD and req_net is None:
13336 raise errors.OpPrereqError("If ip=pool, parameter network"
13338 errors.ECODE_INVAL)
13340 if not netutils.IPAddress.IsValid(ip):
13341 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13342 errors.ECODE_INVAL)
13344 if constants.INIC_MAC in params:
13345 macaddr = params[constants.INIC_MAC]
13346 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13347 macaddr = utils.NormalizeAndValidateMac(macaddr)
13349 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13350 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13351 " modifying an existing NIC",
13352 errors.ECODE_INVAL)
13354 def CheckArguments(self):
13355 if not (self.op.nics or self.op.disks or self.op.disk_template or
13356 self.op.hvparams or self.op.beparams or self.op.os_name or
13357 self.op.offline is not None or self.op.runtime_mem or
13359 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13361 if self.op.hvparams:
13362 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13363 "hypervisor", "instance", "cluster")
13365 self.op.disks = self._UpgradeDiskNicMods(
13366 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13367 self.op.nics = self._UpgradeDiskNicMods(
13368 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13370 if self.op.disks and self.op.disk_template is not None:
13371 raise errors.OpPrereqError("Disk template conversion and other disk"
13372 " changes not supported at the same time",
13373 errors.ECODE_INVAL)
13375 if (self.op.disk_template and
13376 self.op.disk_template in constants.DTS_INT_MIRROR and
13377 self.op.remote_node is None):
13378 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13379 " one requires specifying a secondary node",
13380 errors.ECODE_INVAL)
13382 # Check NIC modifications
13383 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13384 self._VerifyNicModification)
13387 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
13389 def ExpandNames(self):
13390 self._ExpandAndLockInstance()
13391 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13392 # Can't even acquire node locks in shared mode as upcoming changes in
13393 # Ganeti 2.6 will start to modify the node object on disk conversion
13394 self.needed_locks[locking.LEVEL_NODE] = []
13395 self.needed_locks[locking.LEVEL_NODE_RES] = []
13396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13397 # Look node group to look up the ipolicy
13398 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13400 def DeclareLocks(self, level):
13401 if level == locking.LEVEL_NODEGROUP:
13402 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13403 # Acquire locks for the instance's nodegroups optimistically. Needs
13404 # to be verified in CheckPrereq
13405 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13406 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13407 elif level == locking.LEVEL_NODE:
13408 self._LockInstancesNodes()
13409 if self.op.disk_template and self.op.remote_node:
13410 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13411 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13412 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13414 self.needed_locks[locking.LEVEL_NODE_RES] = \
13415 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13417 def BuildHooksEnv(self):
13418 """Build hooks env.
13420 This runs on the master, primary and secondaries.
13424 if constants.BE_MINMEM in self.be_new:
13425 args["minmem"] = self.be_new[constants.BE_MINMEM]
13426 if constants.BE_MAXMEM in self.be_new:
13427 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13428 if constants.BE_VCPUS in self.be_new:
13429 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13430 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13431 # information at all.
13433 if self._new_nics is not None:
13436 for nic in self._new_nics:
13437 n = copy.deepcopy(nic)
13438 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13439 n.nicparams = nicparams
13440 nics.append(_NICToTuple(self, n))
13442 args["nics"] = nics
13444 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13445 if self.op.disk_template:
13446 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13447 if self.op.runtime_mem:
13448 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13452 def BuildHooksNodes(self):
13453 """Build hooks nodes.
13456 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13459 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13460 old_params, cluster, pnode):
13462 update_params_dict = dict([(key, params[key])
13463 for key in constants.NICS_PARAMETERS
13466 req_link = update_params_dict.get(constants.NIC_LINK, None)
13467 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13469 new_net_uuid = None
13470 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13471 if new_net_uuid_or_name:
13472 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13473 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13476 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13479 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13481 raise errors.OpPrereqError("No netparams found for the network"
13482 " %s, probably not connected" %
13483 new_net_obj.name, errors.ECODE_INVAL)
13484 new_params = dict(netparams)
13486 new_params = _GetUpdatedParams(old_params, update_params_dict)
13488 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13490 new_filled_params = cluster.SimpleFillNIC(new_params)
13491 objects.NIC.CheckParameterSyntax(new_filled_params)
13493 new_mode = new_filled_params[constants.NIC_MODE]
13494 if new_mode == constants.NIC_MODE_BRIDGED:
13495 bridge = new_filled_params[constants.NIC_LINK]
13496 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13498 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13500 self.warn.append(msg)
13502 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13504 elif new_mode == constants.NIC_MODE_ROUTED:
13505 ip = params.get(constants.INIC_IP, old_ip)
13507 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13508 " on a routed NIC", errors.ECODE_INVAL)
13510 elif new_mode == constants.NIC_MODE_OVS:
13511 # TODO: check OVS link
13512 self.LogInfo("OVS links are currently not checked for correctness")
13514 if constants.INIC_MAC in params:
13515 mac = params[constants.INIC_MAC]
13517 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13518 errors.ECODE_INVAL)
13519 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13520 # otherwise generate the MAC address
13521 params[constants.INIC_MAC] = \
13522 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13524 # or validate/reserve the current one
13526 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13527 except errors.ReservationError:
13528 raise errors.OpPrereqError("MAC address '%s' already in use"
13529 " in cluster" % mac,
13530 errors.ECODE_NOTUNIQUE)
13531 elif new_net_uuid != old_net_uuid:
13533 def get_net_prefix(net_uuid):
13536 nobj = self.cfg.GetNetwork(net_uuid)
13537 mac_prefix = nobj.mac_prefix
13541 new_prefix = get_net_prefix(new_net_uuid)
13542 old_prefix = get_net_prefix(old_net_uuid)
13543 if old_prefix != new_prefix:
13544 params[constants.INIC_MAC] = \
13545 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13547 # if there is a change in (ip, network) tuple
13548 new_ip = params.get(constants.INIC_IP, old_ip)
13549 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13551 # if IP is pool then require a network and generate one IP
13552 if new_ip.lower() == constants.NIC_IP_POOL:
13555 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13556 except errors.ReservationError:
13557 raise errors.OpPrereqError("Unable to get a free IP"
13558 " from the address pool",
13559 errors.ECODE_STATE)
13560 self.LogInfo("Chose IP %s from network %s",
13563 params[constants.INIC_IP] = new_ip
13565 raise errors.OpPrereqError("ip=pool, but no network found",
13566 errors.ECODE_INVAL)
13567 # Reserve new IP if in the new network if any
13570 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13571 self.LogInfo("Reserving IP %s in network %s",
13572 new_ip, new_net_obj.name)
13573 except errors.ReservationError:
13574 raise errors.OpPrereqError("IP %s not available in network %s" %
13575 (new_ip, new_net_obj.name),
13576 errors.ECODE_NOTUNIQUE)
13577 # new network is None so check if new IP is a conflicting IP
13578 elif self.op.conflicts_check:
13579 _CheckForConflictingIp(self, new_ip, pnode)
13581 # release old IP if old network is not None
13582 if old_ip and old_net_uuid:
13584 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13585 except errors.AddressPoolError:
13586 logging.warning("Release IP %s not contained in network %s",
13587 old_ip, old_net_obj.name)
13589 # there are no changes in (ip, network) tuple and old network is not None
13590 elif (old_net_uuid is not None and
13591 (req_link is not None or req_mode is not None)):
13592 raise errors.OpPrereqError("Not allowed to change link or mode of"
13593 " a NIC that is connected to a network",
13594 errors.ECODE_INVAL)
13596 private.params = new_params
13597 private.filled = new_filled_params
13599 def _PreCheckDiskTemplate(self, pnode_info):
13600 """CheckPrereq checks related to a new disk template."""
13601 # Arguments are passed to avoid configuration lookups
13602 instance = self.instance
13603 pnode = instance.primary_node
13604 cluster = self.cluster
13605 if instance.disk_template == self.op.disk_template:
13606 raise errors.OpPrereqError("Instance already has disk template %s" %
13607 instance.disk_template, errors.ECODE_INVAL)
13609 if (instance.disk_template,
13610 self.op.disk_template) not in self._DISK_CONVERSIONS:
13611 raise errors.OpPrereqError("Unsupported disk template conversion from"
13612 " %s to %s" % (instance.disk_template,
13613 self.op.disk_template),
13614 errors.ECODE_INVAL)
13615 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13616 msg="cannot change disk template")
13617 if self.op.disk_template in constants.DTS_INT_MIRROR:
13618 if self.op.remote_node == pnode:
13619 raise errors.OpPrereqError("Given new secondary node %s is the same"
13620 " as the primary node of the instance" %
13621 self.op.remote_node, errors.ECODE_STATE)
13622 _CheckNodeOnline(self, self.op.remote_node)
13623 _CheckNodeNotDrained(self, self.op.remote_node)
13624 # FIXME: here we assume that the old instance type is DT_PLAIN
13625 assert instance.disk_template == constants.DT_PLAIN
13626 disks = [{constants.IDISK_SIZE: d.size,
13627 constants.IDISK_VG: d.logical_id[0]}
13628 for d in instance.disks]
13629 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13630 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13632 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13633 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13634 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13636 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13637 ignore=self.op.ignore_ipolicy)
13638 if pnode_info.group != snode_info.group:
13639 self.LogWarning("The primary and secondary nodes are in two"
13640 " different node groups; the disk parameters"
13641 " from the first disk's node group will be"
13644 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13645 # Make sure none of the nodes require exclusive storage
13646 nodes = [pnode_info]
13647 if self.op.disk_template in constants.DTS_INT_MIRROR:
13649 nodes.append(snode_info)
13650 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13651 if compat.any(map(has_es, nodes)):
13652 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13653 " storage is enabled" % (instance.disk_template,
13654 self.op.disk_template))
13655 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13657 def CheckPrereq(self):
13658 """Check prerequisites.
13660 This only checks the instance list against the existing names.
13663 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13664 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13666 cluster = self.cluster = self.cfg.GetClusterInfo()
13667 assert self.instance is not None, \
13668 "Cannot retrieve locked instance %s" % self.op.instance_name
13670 pnode = instance.primary_node
13674 if (self.op.pnode is not None and self.op.pnode != pnode and
13675 not self.op.force):
13676 # verify that the instance is not up
13677 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13678 instance.hypervisor)
13679 if instance_info.fail_msg:
13680 self.warn.append("Can't get instance runtime information: %s" %
13681 instance_info.fail_msg)
13682 elif instance_info.payload:
13683 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
13684 errors.ECODE_STATE)
13686 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13687 nodelist = list(instance.all_nodes)
13688 pnode_info = self.cfg.GetNodeInfo(pnode)
13689 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13691 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13692 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13693 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13695 # dictionary with instance information after the modification
13698 # Check disk modifications. This is done here and not in CheckArguments
13699 # (as with NICs), because we need to know the instance's disk template
13700 if instance.disk_template == constants.DT_EXT:
13701 self._CheckMods("disk", self.op.disks, {},
13702 self._VerifyDiskModification)
13704 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13705 self._VerifyDiskModification)
13707 # Prepare disk/NIC modifications
13708 self.diskmod = PrepareContainerMods(self.op.disks, None)
13709 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13711 # Check the validity of the `provider' parameter
13712 if instance.disk_template in constants.DT_EXT:
13713 for mod in self.diskmod:
13714 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13715 if mod[0] == constants.DDM_ADD:
13716 if ext_provider is None:
13717 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13718 " '%s' missing, during disk add" %
13720 constants.IDISK_PROVIDER),
13721 errors.ECODE_NOENT)
13722 elif mod[0] == constants.DDM_MODIFY:
13724 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13726 constants.IDISK_PROVIDER,
13727 errors.ECODE_INVAL)
13729 for mod in self.diskmod:
13730 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13731 if ext_provider is not None:
13732 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13733 " instances of type '%s'" %
13734 (constants.IDISK_PROVIDER,
13736 errors.ECODE_INVAL)
13739 if self.op.os_name and not self.op.force:
13740 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13741 self.op.force_variant)
13742 instance_os = self.op.os_name
13744 instance_os = instance.os
13746 assert not (self.op.disk_template and self.op.disks), \
13747 "Can't modify disk template and apply disk changes at the same time"
13749 if self.op.disk_template:
13750 self._PreCheckDiskTemplate(pnode_info)
13752 # hvparams processing
13753 if self.op.hvparams:
13754 hv_type = instance.hypervisor
13755 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13756 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13757 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13760 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13761 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13762 self.hv_proposed = self.hv_new = hv_new # the new actual values
13763 self.hv_inst = i_hvdict # the new dict (without defaults)
13765 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13767 self.hv_new = self.hv_inst = {}
13769 # beparams processing
13770 if self.op.beparams:
13771 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13773 objects.UpgradeBeParams(i_bedict)
13774 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13775 be_new = cluster.SimpleFillBE(i_bedict)
13776 self.be_proposed = self.be_new = be_new # the new actual values
13777 self.be_inst = i_bedict # the new dict (without defaults)
13779 self.be_new = self.be_inst = {}
13780 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13781 be_old = cluster.FillBE(instance)
13783 # CPU param validation -- checking every time a parameter is
13784 # changed to cover all cases where either CPU mask or vcpus have
13786 if (constants.BE_VCPUS in self.be_proposed and
13787 constants.HV_CPU_MASK in self.hv_proposed):
13789 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13790 # Verify mask is consistent with number of vCPUs. Can skip this
13791 # test if only 1 entry in the CPU mask, which means same mask
13792 # is applied to all vCPUs.
13793 if (len(cpu_list) > 1 and
13794 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13795 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13797 (self.be_proposed[constants.BE_VCPUS],
13798 self.hv_proposed[constants.HV_CPU_MASK]),
13799 errors.ECODE_INVAL)
13801 # Only perform this test if a new CPU mask is given
13802 if constants.HV_CPU_MASK in self.hv_new:
13803 # Calculate the largest CPU number requested
13804 max_requested_cpu = max(map(max, cpu_list))
13805 # Check that all of the instance's nodes have enough physical CPUs to
13806 # satisfy the requested CPU mask
13807 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13808 max_requested_cpu + 1, instance.hypervisor)
13810 # osparams processing
13811 if self.op.osparams:
13812 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13813 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13814 self.os_inst = i_osdict # the new dict (without defaults)
13818 #TODO(dynmem): do the appropriate check involving MINMEM
13819 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13820 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13821 mem_check_list = [pnode]
13822 if be_new[constants.BE_AUTO_BALANCE]:
13823 # either we changed auto_balance to yes or it was from before
13824 mem_check_list.extend(instance.secondary_nodes)
13825 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13826 instance.hypervisor)
13827 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13828 [instance.hypervisor], False)
13829 pninfo = nodeinfo[pnode]
13830 msg = pninfo.fail_msg
13832 # Assume the primary node is unreachable and go ahead
13833 self.warn.append("Can't get info from primary node %s: %s" %
13836 (_, _, (pnhvinfo, )) = pninfo.payload
13837 if not isinstance(pnhvinfo.get("memory_free", None), int):
13838 self.warn.append("Node data from primary node %s doesn't contain"
13839 " free memory information" % pnode)
13840 elif instance_info.fail_msg:
13841 self.warn.append("Can't get instance runtime information: %s" %
13842 instance_info.fail_msg)
13844 if instance_info.payload:
13845 current_mem = int(instance_info.payload["memory"])
13847 # Assume instance not running
13848 # (there is a slight race condition here, but it's not very
13849 # probable, and we have no other way to check)
13850 # TODO: Describe race condition
13852 #TODO(dynmem): do the appropriate check involving MINMEM
13853 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13854 pnhvinfo["memory_free"])
13856 raise errors.OpPrereqError("This change will prevent the instance"
13857 " from starting, due to %d MB of memory"
13858 " missing on its primary node" %
13859 miss_mem, errors.ECODE_NORES)
13861 if be_new[constants.BE_AUTO_BALANCE]:
13862 for node, nres in nodeinfo.items():
13863 if node not in instance.secondary_nodes:
13865 nres.Raise("Can't get info from secondary node %s" % node,
13866 prereq=True, ecode=errors.ECODE_STATE)
13867 (_, _, (nhvinfo, )) = nres.payload
13868 if not isinstance(nhvinfo.get("memory_free", None), int):
13869 raise errors.OpPrereqError("Secondary node %s didn't return free"
13870 " memory information" % node,
13871 errors.ECODE_STATE)
13872 #TODO(dynmem): do the appropriate check involving MINMEM
13873 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13874 raise errors.OpPrereqError("This change will prevent the instance"
13875 " from failover to its secondary node"
13876 " %s, due to not enough memory" % node,
13877 errors.ECODE_STATE)
13879 if self.op.runtime_mem:
13880 remote_info = self.rpc.call_instance_info(instance.primary_node,
13882 instance.hypervisor)
13883 remote_info.Raise("Error checking node %s" % instance.primary_node)
13884 if not remote_info.payload: # not running already
13885 raise errors.OpPrereqError("Instance %s is not running" %
13886 instance.name, errors.ECODE_STATE)
13888 current_memory = remote_info.payload["memory"]
13889 if (not self.op.force and
13890 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13891 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13892 raise errors.OpPrereqError("Instance %s must have memory between %d"
13893 " and %d MB of memory unless --force is"
13896 self.be_proposed[constants.BE_MINMEM],
13897 self.be_proposed[constants.BE_MAXMEM]),
13898 errors.ECODE_INVAL)
13900 delta = self.op.runtime_mem - current_memory
13902 _CheckNodeFreeMemory(self, instance.primary_node,
13903 "ballooning memory for instance %s" %
13904 instance.name, delta, instance.hypervisor)
13906 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13907 raise errors.OpPrereqError("Disk operations not supported for"
13908 " diskless instances", errors.ECODE_INVAL)
13910 def _PrepareNicCreate(_, params, private):
13911 self._PrepareNicModification(params, private, None, None,
13912 {}, cluster, pnode)
13913 return (None, None)
13915 def _PrepareNicMod(_, nic, params, private):
13916 self._PrepareNicModification(params, private, nic.ip, nic.network,
13917 nic.nicparams, cluster, pnode)
13920 def _PrepareNicRemove(_, params, __):
13922 net = params.network
13923 if net is not None and ip is not None:
13924 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13926 # Verify NIC changes (operating on copy)
13927 nics = instance.nics[:]
13928 ApplyContainerMods("NIC", nics, None, self.nicmod,
13929 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13930 if len(nics) > constants.MAX_NICS:
13931 raise errors.OpPrereqError("Instance has too many network interfaces"
13932 " (%d), cannot add more" % constants.MAX_NICS,
13933 errors.ECODE_STATE)
13935 # Verify disk changes (operating on a copy)
13936 disks = instance.disks[:]
13937 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13938 if len(disks) > constants.MAX_DISKS:
13939 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13940 " more" % constants.MAX_DISKS,
13941 errors.ECODE_STATE)
13942 disk_sizes = [disk.size for disk in instance.disks]
13943 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13944 self.diskmod if op == constants.DDM_ADD)
13945 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13946 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13948 if self.op.offline is not None and self.op.offline:
13949 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13950 msg="can't change to offline")
13952 # Pre-compute NIC changes (necessary to use result in hooks)
13953 self._nic_chgdesc = []
13955 # Operate on copies as this is still in prereq
13956 nics = [nic.Copy() for nic in instance.nics]
13957 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13958 self._CreateNewNic, self._ApplyNicMods, None)
13959 self._new_nics = nics
13960 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13962 self._new_nics = None
13963 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13965 if not self.op.ignore_ipolicy:
13966 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13969 # Fill ispec with backend parameters
13970 ispec[constants.ISPEC_SPINDLE_USE] = \
13971 self.be_new.get(constants.BE_SPINDLE_USE, None)
13972 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13975 # Copy ispec to verify parameters with min/max values separately
13976 if self.op.disk_template:
13977 new_disk_template = self.op.disk_template
13979 new_disk_template = instance.disk_template
13980 ispec_max = ispec.copy()
13981 ispec_max[constants.ISPEC_MEM_SIZE] = \
13982 self.be_new.get(constants.BE_MAXMEM, None)
13983 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13985 ispec_min = ispec.copy()
13986 ispec_min[constants.ISPEC_MEM_SIZE] = \
13987 self.be_new.get(constants.BE_MINMEM, None)
13988 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13991 if (res_max or res_min):
13992 # FIXME: Improve error message by including information about whether
13993 # the upper or lower limit of the parameter fails the ipolicy.
13994 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13995 (group_info, group_info.name,
13996 utils.CommaJoin(set(res_max + res_min))))
13997 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13999 def _ConvertPlainToDrbd(self, feedback_fn):
14000 """Converts an instance from plain to drbd.
14003 feedback_fn("Converting template to drbd")
14004 instance = self.instance
14005 pnode = instance.primary_node
14006 snode = self.op.remote_node
14008 assert instance.disk_template == constants.DT_PLAIN
14010 # create a fake disk info for _GenerateDiskTemplate
14011 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
14012 constants.IDISK_VG: d.logical_id[0]}
14013 for d in instance.disks]
14014 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
14015 instance.name, pnode, [snode],
14016 disk_info, None, None, 0, feedback_fn,
14018 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
14020 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
14021 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
14022 info = _GetInstanceInfoText(instance)
14023 feedback_fn("Creating additional volumes...")
14024 # first, create the missing data and meta devices
14025 for disk in anno_disks:
14026 # unfortunately this is... not too nice
14027 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
14028 info, True, p_excl_stor)
14029 for child in disk.children:
14030 _CreateSingleBlockDev(self, snode, instance, child, info, True,
14032 # at this stage, all new LVs have been created, we can rename the
14034 feedback_fn("Renaming original volumes...")
14035 rename_list = [(o, n.children[0].logical_id)
14036 for (o, n) in zip(instance.disks, new_disks)]
14037 result = self.rpc.call_blockdev_rename(pnode, rename_list)
14038 result.Raise("Failed to rename original LVs")
14040 feedback_fn("Initializing DRBD devices...")
14041 # all child devices are in place, we can now create the DRBD devices
14042 for disk in anno_disks:
14043 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
14044 f_create = node == pnode
14045 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
14048 # at this point, the instance has been modified
14049 instance.disk_template = constants.DT_DRBD8
14050 instance.disks = new_disks
14051 self.cfg.Update(instance, feedback_fn)
14053 # Release node locks while waiting for sync
14054 _ReleaseLocks(self, locking.LEVEL_NODE)
14056 # disks are created, waiting for sync
14057 disk_abort = not _WaitForSync(self, instance,
14058 oneshot=not self.op.wait_for_sync)
14060 raise errors.OpExecError("There are some degraded disks for"
14061 " this instance, please cleanup manually")
14063 # Node resource locks will be released by caller
14065 def _ConvertDrbdToPlain(self, feedback_fn):
14066 """Converts an instance from drbd to plain.
14069 instance = self.instance
14071 assert len(instance.secondary_nodes) == 1
14072 assert instance.disk_template == constants.DT_DRBD8
14074 pnode = instance.primary_node
14075 snode = instance.secondary_nodes[0]
14076 feedback_fn("Converting template to plain")
14078 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14079 new_disks = [d.children[0] for d in instance.disks]
14081 # copy over size and mode
14082 for parent, child in zip(old_disks, new_disks):
14083 child.size = parent.size
14084 child.mode = parent.mode
14086 # this is a DRBD disk, return its port to the pool
14087 # NOTE: this must be done right before the call to cfg.Update!
14088 for disk in old_disks:
14089 tcp_port = disk.logical_id[2]
14090 self.cfg.AddTcpUdpPort(tcp_port)
14092 # update instance structure
14093 instance.disks = new_disks
14094 instance.disk_template = constants.DT_PLAIN
14095 self.cfg.Update(instance, feedback_fn)
14097 # Release locks in case removing disks takes a while
14098 _ReleaseLocks(self, locking.LEVEL_NODE)
14100 feedback_fn("Removing volumes on the secondary node...")
14101 for disk in old_disks:
14102 self.cfg.SetDiskID(disk, snode)
14103 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14105 self.LogWarning("Could not remove block device %s on node %s,"
14106 " continuing anyway: %s", disk.iv_name, snode, msg)
14108 feedback_fn("Removing unneeded volumes on the primary node...")
14109 for idx, disk in enumerate(old_disks):
14110 meta = disk.children[1]
14111 self.cfg.SetDiskID(meta, pnode)
14112 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14114 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14115 " continuing anyway: %s", idx, pnode, msg)
14117 def _CreateNewDisk(self, idx, params, _):
14118 """Creates a new disk.
14121 instance = self.instance
14124 if instance.disk_template in constants.DTS_FILEBASED:
14125 (file_driver, file_path) = instance.disks[0].logical_id
14126 file_path = os.path.dirname(file_path)
14128 file_driver = file_path = None
14131 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14132 instance.primary_node, instance.secondary_nodes,
14133 [params], file_path, file_driver, idx,
14134 self.Log, self.diskparams)[0]
14136 info = _GetInstanceInfoText(instance)
14138 logging.info("Creating volume %s for instance %s",
14139 disk.iv_name, instance.name)
14140 # Note: this needs to be kept in sync with _CreateDisks
14142 for node in instance.all_nodes:
14143 f_create = (node == instance.primary_node)
14145 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14146 except errors.OpExecError, err:
14147 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14148 disk.iv_name, disk, node, err)
14150 if self.cluster.prealloc_wipe_disks:
14152 _WipeDisks(self, instance,
14153 disks=[(idx, disk, 0)])
14156 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14160 def _ModifyDisk(idx, disk, params, _):
14161 """Modifies a disk.
14164 disk.mode = params[constants.IDISK_MODE]
14167 ("disk.mode/%d" % idx, disk.mode),
14170 def _RemoveDisk(self, idx, root, _):
14174 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14175 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14176 self.cfg.SetDiskID(disk, node)
14177 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14179 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14180 " continuing anyway", idx, node, msg)
14182 # if this is a DRBD disk, return its port to the pool
14183 if root.dev_type in constants.LDS_DRBD:
14184 self.cfg.AddTcpUdpPort(root.logical_id[2])
14186 def _CreateNewNic(self, idx, params, private):
14187 """Creates data structure for a new network interface.
14190 mac = params[constants.INIC_MAC]
14191 ip = params.get(constants.INIC_IP, None)
14192 net = params.get(constants.INIC_NETWORK, None)
14193 net_uuid = self.cfg.LookupNetwork(net)
14194 #TODO: not private.filled?? can a nic have no nicparams??
14195 nicparams = private.filled
14196 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14200 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14201 (mac, ip, private.filled[constants.NIC_MODE],
14202 private.filled[constants.NIC_LINK],
14206 def _ApplyNicMods(self, idx, nic, params, private):
14207 """Modifies a network interface.
14212 for key in [constants.INIC_MAC, constants.INIC_IP]:
14214 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14215 setattr(nic, key, params[key])
14217 new_net = params.get(constants.INIC_NETWORK, nic.network)
14218 new_net_uuid = self.cfg.LookupNetwork(new_net)
14219 if new_net_uuid != nic.network:
14220 changes.append(("nic.network/%d" % idx, new_net))
14221 nic.network = new_net_uuid
14224 nic.nicparams = private.filled
14226 for (key, val) in nic.nicparams.items():
14227 changes.append(("nic.%s/%d" % (key, idx), val))
14231 def Exec(self, feedback_fn):
14232 """Modifies an instance.
14234 All parameters take effect only at the next restart of the instance.
14237 # Process here the warnings from CheckPrereq, as we don't have a
14238 # feedback_fn there.
14239 # TODO: Replace with self.LogWarning
14240 for warn in self.warn:
14241 feedback_fn("WARNING: %s" % warn)
14243 assert ((self.op.disk_template is None) ^
14244 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14245 "Not owning any node resource locks"
14248 instance = self.instance
14252 instance.primary_node = self.op.pnode
14255 if self.op.runtime_mem:
14256 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14258 self.op.runtime_mem)
14259 rpcres.Raise("Cannot modify instance runtime memory")
14260 result.append(("runtime_memory", self.op.runtime_mem))
14262 # Apply disk changes
14263 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14264 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14265 _UpdateIvNames(0, instance.disks)
14267 if self.op.disk_template:
14269 check_nodes = set(instance.all_nodes)
14270 if self.op.remote_node:
14271 check_nodes.add(self.op.remote_node)
14272 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14273 owned = self.owned_locks(level)
14274 assert not (check_nodes - owned), \
14275 ("Not owning the correct locks, owning %r, expected at least %r" %
14276 (owned, check_nodes))
14278 r_shut = _ShutdownInstanceDisks(self, instance)
14280 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14281 " proceed with disk template conversion")
14282 mode = (instance.disk_template, self.op.disk_template)
14284 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14286 self.cfg.ReleaseDRBDMinors(instance.name)
14288 result.append(("disk_template", self.op.disk_template))
14290 assert instance.disk_template == self.op.disk_template, \
14291 ("Expected disk template '%s', found '%s'" %
14292 (self.op.disk_template, instance.disk_template))
14294 # Release node and resource locks if there are any (they might already have
14295 # been released during disk conversion)
14296 _ReleaseLocks(self, locking.LEVEL_NODE)
14297 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14299 # Apply NIC changes
14300 if self._new_nics is not None:
14301 instance.nics = self._new_nics
14302 result.extend(self._nic_chgdesc)
14305 if self.op.hvparams:
14306 instance.hvparams = self.hv_inst
14307 for key, val in self.op.hvparams.iteritems():
14308 result.append(("hv/%s" % key, val))
14311 if self.op.beparams:
14312 instance.beparams = self.be_inst
14313 for key, val in self.op.beparams.iteritems():
14314 result.append(("be/%s" % key, val))
14317 if self.op.os_name:
14318 instance.os = self.op.os_name
14321 if self.op.osparams:
14322 instance.osparams = self.os_inst
14323 for key, val in self.op.osparams.iteritems():
14324 result.append(("os/%s" % key, val))
14326 if self.op.offline is None:
14329 elif self.op.offline:
14330 # Mark instance as offline
14331 self.cfg.MarkInstanceOffline(instance.name)
14332 result.append(("admin_state", constants.ADMINST_OFFLINE))
14334 # Mark instance as online, but stopped
14335 self.cfg.MarkInstanceDown(instance.name)
14336 result.append(("admin_state", constants.ADMINST_DOWN))
14338 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14340 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14341 self.owned_locks(locking.LEVEL_NODE)), \
14342 "All node locks should have been released by now"
14346 _DISK_CONVERSIONS = {
14347 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14348 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14352 class LUInstanceChangeGroup(LogicalUnit):
14353 HPATH = "instance-change-group"
14354 HTYPE = constants.HTYPE_INSTANCE
14357 def ExpandNames(self):
14358 self.share_locks = _ShareAll()
14360 self.needed_locks = {
14361 locking.LEVEL_NODEGROUP: [],
14362 locking.LEVEL_NODE: [],
14363 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14366 self._ExpandAndLockInstance()
14368 if self.op.target_groups:
14369 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14370 self.op.target_groups)
14372 self.req_target_uuids = None
14374 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14376 def DeclareLocks(self, level):
14377 if level == locking.LEVEL_NODEGROUP:
14378 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14380 if self.req_target_uuids:
14381 lock_groups = set(self.req_target_uuids)
14383 # Lock all groups used by instance optimistically; this requires going
14384 # via the node before it's locked, requiring verification later on
14385 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14386 lock_groups.update(instance_groups)
14388 # No target groups, need to lock all of them
14389 lock_groups = locking.ALL_SET
14391 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14393 elif level == locking.LEVEL_NODE:
14394 if self.req_target_uuids:
14395 # Lock all nodes used by instances
14396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14397 self._LockInstancesNodes()
14399 # Lock all nodes in all potential target groups
14400 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14401 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14402 member_nodes = [node_name
14403 for group in lock_groups
14404 for node_name in self.cfg.GetNodeGroup(group).members]
14405 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14407 # Lock all nodes as all groups are potential targets
14408 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14410 def CheckPrereq(self):
14411 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14412 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14413 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14415 assert (self.req_target_uuids is None or
14416 owned_groups.issuperset(self.req_target_uuids))
14417 assert owned_instances == set([self.op.instance_name])
14419 # Get instance information
14420 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14422 # Check if node groups for locked instance are still correct
14423 assert owned_nodes.issuperset(self.instance.all_nodes), \
14424 ("Instance %s's nodes changed while we kept the lock" %
14425 self.op.instance_name)
14427 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14430 if self.req_target_uuids:
14431 # User requested specific target groups
14432 self.target_uuids = frozenset(self.req_target_uuids)
14434 # All groups except those used by the instance are potential targets
14435 self.target_uuids = owned_groups - inst_groups
14437 conflicting_groups = self.target_uuids & inst_groups
14438 if conflicting_groups:
14439 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14440 " used by the instance '%s'" %
14441 (utils.CommaJoin(conflicting_groups),
14442 self.op.instance_name),
14443 errors.ECODE_INVAL)
14445 if not self.target_uuids:
14446 raise errors.OpPrereqError("There are no possible target groups",
14447 errors.ECODE_INVAL)
14449 def BuildHooksEnv(self):
14450 """Build hooks env.
14453 assert self.target_uuids
14456 "TARGET_GROUPS": " ".join(self.target_uuids),
14459 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14463 def BuildHooksNodes(self):
14464 """Build hooks nodes.
14467 mn = self.cfg.GetMasterNode()
14468 return ([mn], [mn])
14470 def Exec(self, feedback_fn):
14471 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14473 assert instances == [self.op.instance_name], "Instance not locked"
14475 req = iallocator.IAReqGroupChange(instances=instances,
14476 target_groups=list(self.target_uuids))
14477 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14479 ial.Run(self.op.iallocator)
14481 if not ial.success:
14482 raise errors.OpPrereqError("Can't compute solution for changing group of"
14483 " instance '%s' using iallocator '%s': %s" %
14484 (self.op.instance_name, self.op.iallocator,
14485 ial.info), errors.ECODE_NORES)
14487 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14489 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14490 " instance '%s'", len(jobs), self.op.instance_name)
14492 return ResultWithJobs(jobs)
14495 class LUBackupQuery(NoHooksLU):
14496 """Query the exports list
14501 def CheckArguments(self):
14502 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14503 ["node", "export"], self.op.use_locking)
14505 def ExpandNames(self):
14506 self.expq.ExpandNames(self)
14508 def DeclareLocks(self, level):
14509 self.expq.DeclareLocks(self, level)
14511 def Exec(self, feedback_fn):
14514 for (node, expname) in self.expq.OldStyleQuery(self):
14515 if expname is None:
14516 result[node] = False
14518 result.setdefault(node, []).append(expname)
14523 class _ExportQuery(_QueryBase):
14524 FIELDS = query.EXPORT_FIELDS
14526 #: The node name is not a unique key for this query
14527 SORT_FIELD = "node"
14529 def ExpandNames(self, lu):
14530 lu.needed_locks = {}
14532 # The following variables interact with _QueryBase._GetNames
14534 self.wanted = _GetWantedNodes(lu, self.names)
14536 self.wanted = locking.ALL_SET
14538 self.do_locking = self.use_locking
14540 if self.do_locking:
14541 lu.share_locks = _ShareAll()
14542 lu.needed_locks = {
14543 locking.LEVEL_NODE: self.wanted,
14547 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14549 def DeclareLocks(self, lu, level):
14552 def _GetQueryData(self, lu):
14553 """Computes the list of nodes and their attributes.
14556 # Locking is not used
14558 assert not (compat.any(lu.glm.is_owned(level)
14559 for level in locking.LEVELS
14560 if level != locking.LEVEL_CLUSTER) or
14561 self.do_locking or self.use_locking)
14563 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14567 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14569 result.append((node, None))
14571 result.extend((node, expname) for expname in nres.payload)
14576 class LUBackupPrepare(NoHooksLU):
14577 """Prepares an instance for an export and returns useful information.
14582 def ExpandNames(self):
14583 self._ExpandAndLockInstance()
14585 def CheckPrereq(self):
14586 """Check prerequisites.
14589 instance_name = self.op.instance_name
14591 self.instance = self.cfg.GetInstanceInfo(instance_name)
14592 assert self.instance is not None, \
14593 "Cannot retrieve locked instance %s" % self.op.instance_name
14594 _CheckNodeOnline(self, self.instance.primary_node)
14596 self._cds = _GetClusterDomainSecret()
14598 def Exec(self, feedback_fn):
14599 """Prepares an instance for an export.
14602 instance = self.instance
14604 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14605 salt = utils.GenerateSecret(8)
14607 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14608 result = self.rpc.call_x509_cert_create(instance.primary_node,
14609 constants.RIE_CERT_VALIDITY)
14610 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14612 (name, cert_pem) = result.payload
14614 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14618 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14619 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14621 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14627 class LUBackupExport(LogicalUnit):
14628 """Export an instance to an image in the cluster.
14631 HPATH = "instance-export"
14632 HTYPE = constants.HTYPE_INSTANCE
14635 def CheckArguments(self):
14636 """Check the arguments.
14639 self.x509_key_name = self.op.x509_key_name
14640 self.dest_x509_ca_pem = self.op.destination_x509_ca
14642 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14643 if not self.x509_key_name:
14644 raise errors.OpPrereqError("Missing X509 key name for encryption",
14645 errors.ECODE_INVAL)
14647 if not self.dest_x509_ca_pem:
14648 raise errors.OpPrereqError("Missing destination X509 CA",
14649 errors.ECODE_INVAL)
14651 def ExpandNames(self):
14652 self._ExpandAndLockInstance()
14654 # Lock all nodes for local exports
14655 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14656 # FIXME: lock only instance primary and destination node
14658 # Sad but true, for now we have do lock all nodes, as we don't know where
14659 # the previous export might be, and in this LU we search for it and
14660 # remove it from its current node. In the future we could fix this by:
14661 # - making a tasklet to search (share-lock all), then create the
14662 # new one, then one to remove, after
14663 # - removing the removal operation altogether
14664 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14666 # Allocations should be stopped while this LU runs with node locks, but
14667 # it doesn't have to be exclusive
14668 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14669 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14671 def DeclareLocks(self, level):
14672 """Last minute lock declaration."""
14673 # All nodes are locked anyway, so nothing to do here.
14675 def BuildHooksEnv(self):
14676 """Build hooks env.
14678 This will run on the master, primary node and target node.
14682 "EXPORT_MODE": self.op.mode,
14683 "EXPORT_NODE": self.op.target_node,
14684 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14685 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14686 # TODO: Generic function for boolean env variables
14687 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14694 def BuildHooksNodes(self):
14695 """Build hooks nodes.
14698 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14700 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14701 nl.append(self.op.target_node)
14705 def CheckPrereq(self):
14706 """Check prerequisites.
14708 This checks that the instance and node names are valid.
14711 instance_name = self.op.instance_name
14713 self.instance = self.cfg.GetInstanceInfo(instance_name)
14714 assert self.instance is not None, \
14715 "Cannot retrieve locked instance %s" % self.op.instance_name
14716 _CheckNodeOnline(self, self.instance.primary_node)
14718 if (self.op.remove_instance and
14719 self.instance.admin_state == constants.ADMINST_UP and
14720 not self.op.shutdown):
14721 raise errors.OpPrereqError("Can not remove instance without shutting it"
14722 " down before", errors.ECODE_STATE)
14724 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14725 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14726 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14727 assert self.dst_node is not None
14729 _CheckNodeOnline(self, self.dst_node.name)
14730 _CheckNodeNotDrained(self, self.dst_node.name)
14733 self.dest_disk_info = None
14734 self.dest_x509_ca = None
14736 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14737 self.dst_node = None
14739 if len(self.op.target_node) != len(self.instance.disks):
14740 raise errors.OpPrereqError(("Received destination information for %s"
14741 " disks, but instance %s has %s disks") %
14742 (len(self.op.target_node), instance_name,
14743 len(self.instance.disks)),
14744 errors.ECODE_INVAL)
14746 cds = _GetClusterDomainSecret()
14748 # Check X509 key name
14750 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14751 except (TypeError, ValueError), err:
14752 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14753 errors.ECODE_INVAL)
14755 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14756 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14757 errors.ECODE_INVAL)
14759 # Load and verify CA
14761 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14762 except OpenSSL.crypto.Error, err:
14763 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14764 (err, ), errors.ECODE_INVAL)
14766 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14767 if errcode is not None:
14768 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14769 (msg, ), errors.ECODE_INVAL)
14771 self.dest_x509_ca = cert
14773 # Verify target information
14775 for idx, disk_data in enumerate(self.op.target_node):
14777 (host, port, magic) = \
14778 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14779 except errors.GenericError, err:
14780 raise errors.OpPrereqError("Target info for disk %s: %s" %
14781 (idx, err), errors.ECODE_INVAL)
14783 disk_info.append((host, port, magic))
14785 assert len(disk_info) == len(self.op.target_node)
14786 self.dest_disk_info = disk_info
14789 raise errors.ProgrammerError("Unhandled export mode %r" %
14792 # instance disk type verification
14793 # TODO: Implement export support for file-based disks
14794 for disk in self.instance.disks:
14795 if disk.dev_type == constants.LD_FILE:
14796 raise errors.OpPrereqError("Export not supported for instances with"
14797 " file-based disks", errors.ECODE_INVAL)
14799 def _CleanupExports(self, feedback_fn):
14800 """Removes exports of current instance from all other nodes.
14802 If an instance in a cluster with nodes A..D was exported to node C, its
14803 exports will be removed from the nodes A, B and D.
14806 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14808 nodelist = self.cfg.GetNodeList()
14809 nodelist.remove(self.dst_node.name)
14811 # on one-node clusters nodelist will be empty after the removal
14812 # if we proceed the backup would be removed because OpBackupQuery
14813 # substitutes an empty list with the full cluster node list.
14814 iname = self.instance.name
14816 feedback_fn("Removing old exports for instance %s" % iname)
14817 exportlist = self.rpc.call_export_list(nodelist)
14818 for node in exportlist:
14819 if exportlist[node].fail_msg:
14821 if iname in exportlist[node].payload:
14822 msg = self.rpc.call_export_remove(node, iname).fail_msg
14824 self.LogWarning("Could not remove older export for instance %s"
14825 " on node %s: %s", iname, node, msg)
14827 def Exec(self, feedback_fn):
14828 """Export an instance to an image in the cluster.
14831 assert self.op.mode in constants.EXPORT_MODES
14833 instance = self.instance
14834 src_node = instance.primary_node
14836 if self.op.shutdown:
14837 # shutdown the instance, but not the disks
14838 feedback_fn("Shutting down instance %s" % instance.name)
14839 result = self.rpc.call_instance_shutdown(src_node, instance,
14840 self.op.shutdown_timeout)
14841 # TODO: Maybe ignore failures if ignore_remove_failures is set
14842 result.Raise("Could not shutdown instance %s on"
14843 " node %s" % (instance.name, src_node))
14845 # set the disks ID correctly since call_instance_start needs the
14846 # correct drbd minor to create the symlinks
14847 for disk in instance.disks:
14848 self.cfg.SetDiskID(disk, src_node)
14850 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14853 # Activate the instance disks if we'exporting a stopped instance
14854 feedback_fn("Activating disks for %s" % instance.name)
14855 _StartInstanceDisks(self, instance, None)
14858 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14861 helper.CreateSnapshots()
14863 if (self.op.shutdown and
14864 instance.admin_state == constants.ADMINST_UP and
14865 not self.op.remove_instance):
14866 assert not activate_disks
14867 feedback_fn("Starting instance %s" % instance.name)
14868 result = self.rpc.call_instance_start(src_node,
14869 (instance, None, None), False)
14870 msg = result.fail_msg
14872 feedback_fn("Failed to start instance: %s" % msg)
14873 _ShutdownInstanceDisks(self, instance)
14874 raise errors.OpExecError("Could not start instance: %s" % msg)
14876 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14877 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14878 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14879 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14880 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14882 (key_name, _, _) = self.x509_key_name
14885 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14888 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14889 key_name, dest_ca_pem,
14894 # Check for backwards compatibility
14895 assert len(dresults) == len(instance.disks)
14896 assert compat.all(isinstance(i, bool) for i in dresults), \
14897 "Not all results are boolean: %r" % dresults
14901 feedback_fn("Deactivating disks for %s" % instance.name)
14902 _ShutdownInstanceDisks(self, instance)
14904 if not (compat.all(dresults) and fin_resu):
14907 failures.append("export finalization")
14908 if not compat.all(dresults):
14909 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14911 failures.append("disk export: disk(s) %s" % fdsk)
14913 raise errors.OpExecError("Export failed, errors in %s" %
14914 utils.CommaJoin(failures))
14916 # At this point, the export was successful, we can cleanup/finish
14918 # Remove instance if requested
14919 if self.op.remove_instance:
14920 feedback_fn("Removing instance %s" % instance.name)
14921 _RemoveInstance(self, feedback_fn, instance,
14922 self.op.ignore_remove_failures)
14924 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14925 self._CleanupExports(feedback_fn)
14927 return fin_resu, dresults
14930 class LUBackupRemove(NoHooksLU):
14931 """Remove exports related to the named instance.
14936 def ExpandNames(self):
14937 self.needed_locks = {
14938 # We need all nodes to be locked in order for RemoveExport to work, but
14939 # we don't need to lock the instance itself, as nothing will happen to it
14940 # (and we can remove exports also for a removed instance)
14941 locking.LEVEL_NODE: locking.ALL_SET,
14943 # Removing backups is quick, so blocking allocations is justified
14944 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14947 # Allocations should be stopped while this LU runs with node locks, but it
14948 # doesn't have to be exclusive
14949 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14951 def Exec(self, feedback_fn):
14952 """Remove any export.
14955 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14956 # If the instance was not found we'll try with the name that was passed in.
14957 # This will only work if it was an FQDN, though.
14959 if not instance_name:
14961 instance_name = self.op.instance_name
14963 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14964 exportlist = self.rpc.call_export_list(locked_nodes)
14966 for node in exportlist:
14967 msg = exportlist[node].fail_msg
14969 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14971 if instance_name in exportlist[node].payload:
14973 result = self.rpc.call_export_remove(node, instance_name)
14974 msg = result.fail_msg
14976 logging.error("Could not remove export for instance %s"
14977 " on node %s: %s", instance_name, node, msg)
14979 if fqdn_warn and not found:
14980 feedback_fn("Export not found. If trying to remove an export belonging"
14981 " to a deleted instance please use its Fully Qualified"
14985 class LUGroupAdd(LogicalUnit):
14986 """Logical unit for creating node groups.
14989 HPATH = "group-add"
14990 HTYPE = constants.HTYPE_GROUP
14993 def ExpandNames(self):
14994 # We need the new group's UUID here so that we can create and acquire the
14995 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14996 # that it should not check whether the UUID exists in the configuration.
14997 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14998 self.needed_locks = {}
14999 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15001 def CheckPrereq(self):
15002 """Check prerequisites.
15004 This checks that the given group name is not an existing node group
15009 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15010 except errors.OpPrereqError:
15013 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
15014 " node group (UUID: %s)" %
15015 (self.op.group_name, existing_uuid),
15016 errors.ECODE_EXISTS)
15018 if self.op.ndparams:
15019 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
15021 if self.op.hv_state:
15022 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
15024 self.new_hv_state = None
15026 if self.op.disk_state:
15027 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
15029 self.new_disk_state = None
15031 if self.op.diskparams:
15032 for templ in constants.DISK_TEMPLATES:
15033 if templ in self.op.diskparams:
15034 utils.ForceDictType(self.op.diskparams[templ],
15035 constants.DISK_DT_TYPES)
15036 self.new_diskparams = self.op.diskparams
15038 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15039 except errors.OpPrereqError, err:
15040 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15041 errors.ECODE_INVAL)
15043 self.new_diskparams = {}
15045 if self.op.ipolicy:
15046 cluster = self.cfg.GetClusterInfo()
15047 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
15049 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
15050 except errors.ConfigurationError, err:
15051 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
15052 errors.ECODE_INVAL)
15054 def BuildHooksEnv(self):
15055 """Build hooks env.
15059 "GROUP_NAME": self.op.group_name,
15062 def BuildHooksNodes(self):
15063 """Build hooks nodes.
15066 mn = self.cfg.GetMasterNode()
15067 return ([mn], [mn])
15069 def Exec(self, feedback_fn):
15070 """Add the node group to the cluster.
15073 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15074 uuid=self.group_uuid,
15075 alloc_policy=self.op.alloc_policy,
15076 ndparams=self.op.ndparams,
15077 diskparams=self.new_diskparams,
15078 ipolicy=self.op.ipolicy,
15079 hv_state_static=self.new_hv_state,
15080 disk_state_static=self.new_disk_state)
15082 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15083 del self.remove_locks[locking.LEVEL_NODEGROUP]
15086 class LUGroupAssignNodes(NoHooksLU):
15087 """Logical unit for assigning nodes to groups.
15092 def ExpandNames(self):
15093 # These raise errors.OpPrereqError on their own:
15094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15095 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15097 # We want to lock all the affected nodes and groups. We have readily
15098 # available the list of nodes, and the *destination* group. To gather the
15099 # list of "source" groups, we need to fetch node information later on.
15100 self.needed_locks = {
15101 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15102 locking.LEVEL_NODE: self.op.nodes,
15105 def DeclareLocks(self, level):
15106 if level == locking.LEVEL_NODEGROUP:
15107 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15109 # Try to get all affected nodes' groups without having the group or node
15110 # lock yet. Needs verification later in the code flow.
15111 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15113 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15115 def CheckPrereq(self):
15116 """Check prerequisites.
15119 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15120 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15121 frozenset(self.op.nodes))
15123 expected_locks = (set([self.group_uuid]) |
15124 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15125 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15126 if actual_locks != expected_locks:
15127 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15128 " current groups are '%s', used to be '%s'" %
15129 (utils.CommaJoin(expected_locks),
15130 utils.CommaJoin(actual_locks)))
15132 self.node_data = self.cfg.GetAllNodesInfo()
15133 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15134 instance_data = self.cfg.GetAllInstancesInfo()
15136 if self.group is None:
15137 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15138 (self.op.group_name, self.group_uuid))
15140 (new_splits, previous_splits) = \
15141 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15142 for node in self.op.nodes],
15143 self.node_data, instance_data)
15146 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15148 if not self.op.force:
15149 raise errors.OpExecError("The following instances get split by this"
15150 " change and --force was not given: %s" %
15153 self.LogWarning("This operation will split the following instances: %s",
15156 if previous_splits:
15157 self.LogWarning("In addition, these already-split instances continue"
15158 " to be split across groups: %s",
15159 utils.CommaJoin(utils.NiceSort(previous_splits)))
15161 def Exec(self, feedback_fn):
15162 """Assign nodes to a new group.
15165 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15167 self.cfg.AssignGroupNodes(mods)
15170 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15171 """Check for split instances after a node assignment.
15173 This method considers a series of node assignments as an atomic operation,
15174 and returns information about split instances after applying the set of
15177 In particular, it returns information about newly split instances, and
15178 instances that were already split, and remain so after the change.
15180 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15183 @type changes: list of (node_name, new_group_uuid) pairs.
15184 @param changes: list of node assignments to consider.
15185 @param node_data: a dict with data for all nodes
15186 @param instance_data: a dict with all instances to consider
15187 @rtype: a two-tuple
15188 @return: a list of instances that were previously okay and result split as a
15189 consequence of this change, and a list of instances that were previously
15190 split and this change does not fix.
15193 changed_nodes = dict((node, group) for node, group in changes
15194 if node_data[node].group != group)
15196 all_split_instances = set()
15197 previously_split_instances = set()
15199 def InstanceNodes(instance):
15200 return [instance.primary_node] + list(instance.secondary_nodes)
15202 for inst in instance_data.values():
15203 if inst.disk_template not in constants.DTS_INT_MIRROR:
15206 instance_nodes = InstanceNodes(inst)
15208 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15209 previously_split_instances.add(inst.name)
15211 if len(set(changed_nodes.get(node, node_data[node].group)
15212 for node in instance_nodes)) > 1:
15213 all_split_instances.add(inst.name)
15215 return (list(all_split_instances - previously_split_instances),
15216 list(previously_split_instances & all_split_instances))
15219 class _GroupQuery(_QueryBase):
15220 FIELDS = query.GROUP_FIELDS
15222 def ExpandNames(self, lu):
15223 lu.needed_locks = {}
15225 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15226 self._cluster = lu.cfg.GetClusterInfo()
15227 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15230 self.wanted = [name_to_uuid[name]
15231 for name in utils.NiceSort(name_to_uuid.keys())]
15233 # Accept names to be either names or UUIDs.
15236 all_uuid = frozenset(self._all_groups.keys())
15238 for name in self.names:
15239 if name in all_uuid:
15240 self.wanted.append(name)
15241 elif name in name_to_uuid:
15242 self.wanted.append(name_to_uuid[name])
15244 missing.append(name)
15247 raise errors.OpPrereqError("Some groups do not exist: %s" %
15248 utils.CommaJoin(missing),
15249 errors.ECODE_NOENT)
15251 def DeclareLocks(self, lu, level):
15254 def _GetQueryData(self, lu):
15255 """Computes the list of node groups and their attributes.
15258 do_nodes = query.GQ_NODE in self.requested_data
15259 do_instances = query.GQ_INST in self.requested_data
15261 group_to_nodes = None
15262 group_to_instances = None
15264 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15265 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15266 # latter GetAllInstancesInfo() is not enough, for we have to go through
15267 # instance->node. Hence, we will need to process nodes even if we only need
15268 # instance information.
15269 if do_nodes or do_instances:
15270 all_nodes = lu.cfg.GetAllNodesInfo()
15271 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15274 for node in all_nodes.values():
15275 if node.group in group_to_nodes:
15276 group_to_nodes[node.group].append(node.name)
15277 node_to_group[node.name] = node.group
15280 all_instances = lu.cfg.GetAllInstancesInfo()
15281 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15283 for instance in all_instances.values():
15284 node = instance.primary_node
15285 if node in node_to_group:
15286 group_to_instances[node_to_group[node]].append(instance.name)
15289 # Do not pass on node information if it was not requested.
15290 group_to_nodes = None
15292 return query.GroupQueryData(self._cluster,
15293 [self._all_groups[uuid]
15294 for uuid in self.wanted],
15295 group_to_nodes, group_to_instances,
15296 query.GQ_DISKPARAMS in self.requested_data)
15299 class LUGroupQuery(NoHooksLU):
15300 """Logical unit for querying node groups.
15305 def CheckArguments(self):
15306 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15307 self.op.output_fields, False)
15309 def ExpandNames(self):
15310 self.gq.ExpandNames(self)
15312 def DeclareLocks(self, level):
15313 self.gq.DeclareLocks(self, level)
15315 def Exec(self, feedback_fn):
15316 return self.gq.OldStyleQuery(self)
15319 class LUGroupSetParams(LogicalUnit):
15320 """Modifies the parameters of a node group.
15323 HPATH = "group-modify"
15324 HTYPE = constants.HTYPE_GROUP
15327 def CheckArguments(self):
15330 self.op.diskparams,
15331 self.op.alloc_policy,
15333 self.op.disk_state,
15337 if all_changes.count(None) == len(all_changes):
15338 raise errors.OpPrereqError("Please pass at least one modification",
15339 errors.ECODE_INVAL)
15341 def ExpandNames(self):
15342 # This raises errors.OpPrereqError on its own:
15343 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15345 self.needed_locks = {
15346 locking.LEVEL_INSTANCE: [],
15347 locking.LEVEL_NODEGROUP: [self.group_uuid],
15350 self.share_locks[locking.LEVEL_INSTANCE] = 1
15352 def DeclareLocks(self, level):
15353 if level == locking.LEVEL_INSTANCE:
15354 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15356 # Lock instances optimistically, needs verification once group lock has
15358 self.needed_locks[locking.LEVEL_INSTANCE] = \
15359 self.cfg.GetNodeGroupInstances(self.group_uuid)
15362 def _UpdateAndVerifyDiskParams(old, new):
15363 """Updates and verifies disk parameters.
15366 new_params = _GetUpdatedParams(old, new)
15367 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15370 def CheckPrereq(self):
15371 """Check prerequisites.
15374 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15376 # Check if locked instances are still correct
15377 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15379 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15380 cluster = self.cfg.GetClusterInfo()
15382 if self.group is None:
15383 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15384 (self.op.group_name, self.group_uuid))
15386 if self.op.ndparams:
15387 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15388 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15389 self.new_ndparams = new_ndparams
15391 if self.op.diskparams:
15392 diskparams = self.group.diskparams
15393 uavdp = self._UpdateAndVerifyDiskParams
15394 # For each disktemplate subdict update and verify the values
15395 new_diskparams = dict((dt,
15396 uavdp(diskparams.get(dt, {}),
15397 self.op.diskparams[dt]))
15398 for dt in constants.DISK_TEMPLATES
15399 if dt in self.op.diskparams)
15400 # As we've all subdicts of diskparams ready, lets merge the actual
15401 # dict with all updated subdicts
15402 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15404 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15405 except errors.OpPrereqError, err:
15406 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15407 errors.ECODE_INVAL)
15409 if self.op.hv_state:
15410 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15411 self.group.hv_state_static)
15413 if self.op.disk_state:
15414 self.new_disk_state = \
15415 _MergeAndVerifyDiskState(self.op.disk_state,
15416 self.group.disk_state_static)
15418 if self.op.ipolicy:
15419 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15423 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15424 inst_filter = lambda inst: inst.name in owned_instances
15425 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15426 gmi = ganeti.masterd.instance
15428 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15430 new_ipolicy, instances, self.cfg)
15433 self.LogWarning("After the ipolicy change the following instances"
15434 " violate them: %s",
15435 utils.CommaJoin(violations))
15437 def BuildHooksEnv(self):
15438 """Build hooks env.
15442 "GROUP_NAME": self.op.group_name,
15443 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15446 def BuildHooksNodes(self):
15447 """Build hooks nodes.
15450 mn = self.cfg.GetMasterNode()
15451 return ([mn], [mn])
15453 def Exec(self, feedback_fn):
15454 """Modifies the node group.
15459 if self.op.ndparams:
15460 self.group.ndparams = self.new_ndparams
15461 result.append(("ndparams", str(self.group.ndparams)))
15463 if self.op.diskparams:
15464 self.group.diskparams = self.new_diskparams
15465 result.append(("diskparams", str(self.group.diskparams)))
15467 if self.op.alloc_policy:
15468 self.group.alloc_policy = self.op.alloc_policy
15470 if self.op.hv_state:
15471 self.group.hv_state_static = self.new_hv_state
15473 if self.op.disk_state:
15474 self.group.disk_state_static = self.new_disk_state
15476 if self.op.ipolicy:
15477 self.group.ipolicy = self.new_ipolicy
15479 self.cfg.Update(self.group, feedback_fn)
15483 class LUGroupRemove(LogicalUnit):
15484 HPATH = "group-remove"
15485 HTYPE = constants.HTYPE_GROUP
15488 def ExpandNames(self):
15489 # This will raises errors.OpPrereqError on its own:
15490 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15491 self.needed_locks = {
15492 locking.LEVEL_NODEGROUP: [self.group_uuid],
15495 def CheckPrereq(self):
15496 """Check prerequisites.
15498 This checks that the given group name exists as a node group, that is
15499 empty (i.e., contains no nodes), and that is not the last group of the
15503 # Verify that the group is empty.
15504 group_nodes = [node.name
15505 for node in self.cfg.GetAllNodesInfo().values()
15506 if node.group == self.group_uuid]
15509 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15511 (self.op.group_name,
15512 utils.CommaJoin(utils.NiceSort(group_nodes))),
15513 errors.ECODE_STATE)
15515 # Verify the cluster would not be left group-less.
15516 if len(self.cfg.GetNodeGroupList()) == 1:
15517 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15518 " removed" % self.op.group_name,
15519 errors.ECODE_STATE)
15521 def BuildHooksEnv(self):
15522 """Build hooks env.
15526 "GROUP_NAME": self.op.group_name,
15529 def BuildHooksNodes(self):
15530 """Build hooks nodes.
15533 mn = self.cfg.GetMasterNode()
15534 return ([mn], [mn])
15536 def Exec(self, feedback_fn):
15537 """Remove the node group.
15541 self.cfg.RemoveNodeGroup(self.group_uuid)
15542 except errors.ConfigurationError:
15543 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15544 (self.op.group_name, self.group_uuid))
15546 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15549 class LUGroupRename(LogicalUnit):
15550 HPATH = "group-rename"
15551 HTYPE = constants.HTYPE_GROUP
15554 def ExpandNames(self):
15555 # This raises errors.OpPrereqError on its own:
15556 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15558 self.needed_locks = {
15559 locking.LEVEL_NODEGROUP: [self.group_uuid],
15562 def CheckPrereq(self):
15563 """Check prerequisites.
15565 Ensures requested new name is not yet used.
15569 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15570 except errors.OpPrereqError:
15573 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15574 " node group (UUID: %s)" %
15575 (self.op.new_name, new_name_uuid),
15576 errors.ECODE_EXISTS)
15578 def BuildHooksEnv(self):
15579 """Build hooks env.
15583 "OLD_NAME": self.op.group_name,
15584 "NEW_NAME": self.op.new_name,
15587 def BuildHooksNodes(self):
15588 """Build hooks nodes.
15591 mn = self.cfg.GetMasterNode()
15593 all_nodes = self.cfg.GetAllNodesInfo()
15594 all_nodes.pop(mn, None)
15597 run_nodes.extend(node.name for node in all_nodes.values()
15598 if node.group == self.group_uuid)
15600 return (run_nodes, run_nodes)
15602 def Exec(self, feedback_fn):
15603 """Rename the node group.
15606 group = self.cfg.GetNodeGroup(self.group_uuid)
15609 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15610 (self.op.group_name, self.group_uuid))
15612 group.name = self.op.new_name
15613 self.cfg.Update(group, feedback_fn)
15615 return self.op.new_name
15618 class LUGroupEvacuate(LogicalUnit):
15619 HPATH = "group-evacuate"
15620 HTYPE = constants.HTYPE_GROUP
15623 def ExpandNames(self):
15624 # This raises errors.OpPrereqError on its own:
15625 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15627 if self.op.target_groups:
15628 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15629 self.op.target_groups)
15631 self.req_target_uuids = []
15633 if self.group_uuid in self.req_target_uuids:
15634 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15635 " as a target group (targets are %s)" %
15637 utils.CommaJoin(self.req_target_uuids)),
15638 errors.ECODE_INVAL)
15640 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15642 self.share_locks = _ShareAll()
15643 self.needed_locks = {
15644 locking.LEVEL_INSTANCE: [],
15645 locking.LEVEL_NODEGROUP: [],
15646 locking.LEVEL_NODE: [],
15649 def DeclareLocks(self, level):
15650 if level == locking.LEVEL_INSTANCE:
15651 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15653 # Lock instances optimistically, needs verification once node and group
15654 # locks have been acquired
15655 self.needed_locks[locking.LEVEL_INSTANCE] = \
15656 self.cfg.GetNodeGroupInstances(self.group_uuid)
15658 elif level == locking.LEVEL_NODEGROUP:
15659 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15661 if self.req_target_uuids:
15662 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15664 # Lock all groups used by instances optimistically; this requires going
15665 # via the node before it's locked, requiring verification later on
15666 lock_groups.update(group_uuid
15667 for instance_name in
15668 self.owned_locks(locking.LEVEL_INSTANCE)
15670 self.cfg.GetInstanceNodeGroups(instance_name))
15672 # No target groups, need to lock all of them
15673 lock_groups = locking.ALL_SET
15675 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15677 elif level == locking.LEVEL_NODE:
15678 # This will only lock the nodes in the group to be evacuated which
15679 # contain actual instances
15680 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15681 self._LockInstancesNodes()
15683 # Lock all nodes in group to be evacuated and target groups
15684 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15685 assert self.group_uuid in owned_groups
15686 member_nodes = [node_name
15687 for group in owned_groups
15688 for node_name in self.cfg.GetNodeGroup(group).members]
15689 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15691 def CheckPrereq(self):
15692 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15693 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15694 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15696 assert owned_groups.issuperset(self.req_target_uuids)
15697 assert self.group_uuid in owned_groups
15699 # Check if locked instances are still correct
15700 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15702 # Get instance information
15703 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15705 # Check if node groups for locked instances are still correct
15706 _CheckInstancesNodeGroups(self.cfg, self.instances,
15707 owned_groups, owned_nodes, self.group_uuid)
15709 if self.req_target_uuids:
15710 # User requested specific target groups
15711 self.target_uuids = self.req_target_uuids
15713 # All groups except the one to be evacuated are potential targets
15714 self.target_uuids = [group_uuid for group_uuid in owned_groups
15715 if group_uuid != self.group_uuid]
15717 if not self.target_uuids:
15718 raise errors.OpPrereqError("There are no possible target groups",
15719 errors.ECODE_INVAL)
15721 def BuildHooksEnv(self):
15722 """Build hooks env.
15726 "GROUP_NAME": self.op.group_name,
15727 "TARGET_GROUPS": " ".join(self.target_uuids),
15730 def BuildHooksNodes(self):
15731 """Build hooks nodes.
15734 mn = self.cfg.GetMasterNode()
15736 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15738 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15740 return (run_nodes, run_nodes)
15742 def Exec(self, feedback_fn):
15743 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15745 assert self.group_uuid not in self.target_uuids
15747 req = iallocator.IAReqGroupChange(instances=instances,
15748 target_groups=self.target_uuids)
15749 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15751 ial.Run(self.op.iallocator)
15753 if not ial.success:
15754 raise errors.OpPrereqError("Can't compute group evacuation using"
15755 " iallocator '%s': %s" %
15756 (self.op.iallocator, ial.info),
15757 errors.ECODE_NORES)
15759 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15761 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15762 len(jobs), self.op.group_name)
15764 return ResultWithJobs(jobs)
15767 class TagsLU(NoHooksLU): # pylint: disable=W0223
15768 """Generic tags LU.
15770 This is an abstract class which is the parent of all the other tags LUs.
15773 def ExpandNames(self):
15774 self.group_uuid = None
15775 self.needed_locks = {}
15777 if self.op.kind == constants.TAG_NODE:
15778 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15779 lock_level = locking.LEVEL_NODE
15780 lock_name = self.op.name
15781 elif self.op.kind == constants.TAG_INSTANCE:
15782 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15783 lock_level = locking.LEVEL_INSTANCE
15784 lock_name = self.op.name
15785 elif self.op.kind == constants.TAG_NODEGROUP:
15786 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15787 lock_level = locking.LEVEL_NODEGROUP
15788 lock_name = self.group_uuid
15789 elif self.op.kind == constants.TAG_NETWORK:
15790 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15791 lock_level = locking.LEVEL_NETWORK
15792 lock_name = self.network_uuid
15797 if lock_level and getattr(self.op, "use_locking", True):
15798 self.needed_locks[lock_level] = lock_name
15800 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15801 # not possible to acquire the BGL based on opcode parameters)
15803 def CheckPrereq(self):
15804 """Check prerequisites.
15807 if self.op.kind == constants.TAG_CLUSTER:
15808 self.target = self.cfg.GetClusterInfo()
15809 elif self.op.kind == constants.TAG_NODE:
15810 self.target = self.cfg.GetNodeInfo(self.op.name)
15811 elif self.op.kind == constants.TAG_INSTANCE:
15812 self.target = self.cfg.GetInstanceInfo(self.op.name)
15813 elif self.op.kind == constants.TAG_NODEGROUP:
15814 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15815 elif self.op.kind == constants.TAG_NETWORK:
15816 self.target = self.cfg.GetNetwork(self.network_uuid)
15818 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15819 str(self.op.kind), errors.ECODE_INVAL)
15822 class LUTagsGet(TagsLU):
15823 """Returns the tags of a given object.
15828 def ExpandNames(self):
15829 TagsLU.ExpandNames(self)
15831 # Share locks as this is only a read operation
15832 self.share_locks = _ShareAll()
15834 def Exec(self, feedback_fn):
15835 """Returns the tag list.
15838 return list(self.target.GetTags())
15841 class LUTagsSearch(NoHooksLU):
15842 """Searches the tags for a given pattern.
15847 def ExpandNames(self):
15848 self.needed_locks = {}
15850 def CheckPrereq(self):
15851 """Check prerequisites.
15853 This checks the pattern passed for validity by compiling it.
15857 self.re = re.compile(self.op.pattern)
15858 except re.error, err:
15859 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15860 (self.op.pattern, err), errors.ECODE_INVAL)
15862 def Exec(self, feedback_fn):
15863 """Returns the tag list.
15867 tgts = [("/cluster", cfg.GetClusterInfo())]
15868 ilist = cfg.GetAllInstancesInfo().values()
15869 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15870 nlist = cfg.GetAllNodesInfo().values()
15871 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15872 tgts.extend(("/nodegroup/%s" % n.name, n)
15873 for n in cfg.GetAllNodeGroupsInfo().values())
15875 for path, target in tgts:
15876 for tag in target.GetTags():
15877 if self.re.search(tag):
15878 results.append((path, tag))
15882 class LUTagsSet(TagsLU):
15883 """Sets a tag on a given object.
15888 def CheckPrereq(self):
15889 """Check prerequisites.
15891 This checks the type and length of the tag name and value.
15894 TagsLU.CheckPrereq(self)
15895 for tag in self.op.tags:
15896 objects.TaggableObject.ValidateTag(tag)
15898 def Exec(self, feedback_fn):
15903 for tag in self.op.tags:
15904 self.target.AddTag(tag)
15905 except errors.TagError, err:
15906 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15907 self.cfg.Update(self.target, feedback_fn)
15910 class LUTagsDel(TagsLU):
15911 """Delete a list of tags from a given object.
15916 def CheckPrereq(self):
15917 """Check prerequisites.
15919 This checks that we have the given tag.
15922 TagsLU.CheckPrereq(self)
15923 for tag in self.op.tags:
15924 objects.TaggableObject.ValidateTag(tag)
15925 del_tags = frozenset(self.op.tags)
15926 cur_tags = self.target.GetTags()
15928 diff_tags = del_tags - cur_tags
15930 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15931 raise errors.OpPrereqError("Tag(s) %s not found" %
15932 (utils.CommaJoin(diff_names), ),
15933 errors.ECODE_NOENT)
15935 def Exec(self, feedback_fn):
15936 """Remove the tag from the object.
15939 for tag in self.op.tags:
15940 self.target.RemoveTag(tag)
15941 self.cfg.Update(self.target, feedback_fn)
15944 class LUTestDelay(NoHooksLU):
15945 """Sleep for a specified amount of time.
15947 This LU sleeps on the master and/or nodes for a specified amount of
15953 def ExpandNames(self):
15954 """Expand names and set required locks.
15956 This expands the node list, if any.
15959 self.needed_locks = {}
15960 if self.op.on_nodes:
15961 # _GetWantedNodes can be used here, but is not always appropriate to use
15962 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15963 # more information.
15964 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15965 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15967 def _TestDelay(self):
15968 """Do the actual sleep.
15971 if self.op.on_master:
15972 if not utils.TestDelay(self.op.duration):
15973 raise errors.OpExecError("Error during master delay test")
15974 if self.op.on_nodes:
15975 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15976 for node, node_result in result.items():
15977 node_result.Raise("Failure during rpc call to node %s" % node)
15979 def Exec(self, feedback_fn):
15980 """Execute the test delay opcode, with the wanted repetitions.
15983 if self.op.repeat == 0:
15986 top_value = self.op.repeat - 1
15987 for i in range(self.op.repeat):
15988 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15992 class LURestrictedCommand(NoHooksLU):
15993 """Logical unit for executing restricted commands.
15998 def ExpandNames(self):
16000 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
16002 self.needed_locks = {
16003 locking.LEVEL_NODE: self.op.nodes,
16005 self.share_locks = {
16006 locking.LEVEL_NODE: not self.op.use_locking,
16009 def CheckPrereq(self):
16010 """Check prerequisites.
16014 def Exec(self, feedback_fn):
16015 """Execute restricted command and return output.
16018 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
16020 # Check if correct locks are held
16021 assert set(self.op.nodes).issubset(owned_nodes)
16023 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
16027 for node_name in self.op.nodes:
16028 nres = rpcres[node_name]
16030 msg = ("Command '%s' on node '%s' failed: %s" %
16031 (self.op.command, node_name, nres.fail_msg))
16032 result.append((False, msg))
16034 result.append((True, nres.payload))
16039 class LUTestJqueue(NoHooksLU):
16040 """Utility LU to test some aspects of the job queue.
16045 # Must be lower than default timeout for WaitForJobChange to see whether it
16046 # notices changed jobs
16047 _CLIENT_CONNECT_TIMEOUT = 20.0
16048 _CLIENT_CONFIRM_TIMEOUT = 60.0
16051 def _NotifyUsingSocket(cls, cb, errcls):
16052 """Opens a Unix socket and waits for another program to connect.
16055 @param cb: Callback to send socket name to client
16056 @type errcls: class
16057 @param errcls: Exception class to use for errors
16060 # Using a temporary directory as there's no easy way to create temporary
16061 # sockets without writing a custom loop around tempfile.mktemp and
16063 tmpdir = tempfile.mkdtemp()
16065 tmpsock = utils.PathJoin(tmpdir, "sock")
16067 logging.debug("Creating temporary socket at %s", tmpsock)
16068 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16073 # Send details to client
16076 # Wait for client to connect before continuing
16077 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16079 (conn, _) = sock.accept()
16080 except socket.error, err:
16081 raise errcls("Client didn't connect in time (%s)" % err)
16085 # Remove as soon as client is connected
16086 shutil.rmtree(tmpdir)
16088 # Wait for client to close
16091 # pylint: disable=E1101
16092 # Instance of '_socketobject' has no ... member
16093 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16095 except socket.error, err:
16096 raise errcls("Client failed to confirm notification (%s)" % err)
16100 def _SendNotification(self, test, arg, sockname):
16101 """Sends a notification to the client.
16104 @param test: Test name
16105 @param arg: Test argument (depends on test)
16106 @type sockname: string
16107 @param sockname: Socket path
16110 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16112 def _Notify(self, prereq, test, arg):
16113 """Notifies the client of a test.
16116 @param prereq: Whether this is a prereq-phase test
16118 @param test: Test name
16119 @param arg: Test argument (depends on test)
16123 errcls = errors.OpPrereqError
16125 errcls = errors.OpExecError
16127 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16131 def CheckArguments(self):
16132 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16133 self.expandnames_calls = 0
16135 def ExpandNames(self):
16136 checkargs_calls = getattr(self, "checkargs_calls", 0)
16137 if checkargs_calls < 1:
16138 raise errors.ProgrammerError("CheckArguments was not called")
16140 self.expandnames_calls += 1
16142 if self.op.notify_waitlock:
16143 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16145 self.LogInfo("Expanding names")
16147 # Get lock on master node (just to get a lock, not for a particular reason)
16148 self.needed_locks = {
16149 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16152 def Exec(self, feedback_fn):
16153 if self.expandnames_calls < 1:
16154 raise errors.ProgrammerError("ExpandNames was not called")
16156 if self.op.notify_exec:
16157 self._Notify(False, constants.JQT_EXEC, None)
16159 self.LogInfo("Executing")
16161 if self.op.log_messages:
16162 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16163 for idx, msg in enumerate(self.op.log_messages):
16164 self.LogInfo("Sending log message %s", idx + 1)
16165 feedback_fn(constants.JQT_MSGPREFIX + msg)
16166 # Report how many test messages have been sent
16167 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16170 raise errors.OpExecError("Opcode failure was requested")
16175 class LUTestAllocator(NoHooksLU):
16176 """Run allocator tests.
16178 This LU runs the allocator tests
16181 def CheckPrereq(self):
16182 """Check prerequisites.
16184 This checks the opcode parameters depending on the director and mode test.
16187 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16188 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16189 for attr in ["memory", "disks", "disk_template",
16190 "os", "tags", "nics", "vcpus"]:
16191 if not hasattr(self.op, attr):
16192 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16193 attr, errors.ECODE_INVAL)
16194 iname = self.cfg.ExpandInstanceName(self.op.name)
16195 if iname is not None:
16196 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16197 iname, errors.ECODE_EXISTS)
16198 if not isinstance(self.op.nics, list):
16199 raise errors.OpPrereqError("Invalid parameter 'nics'",
16200 errors.ECODE_INVAL)
16201 if not isinstance(self.op.disks, list):
16202 raise errors.OpPrereqError("Invalid parameter 'disks'",
16203 errors.ECODE_INVAL)
16204 for row in self.op.disks:
16205 if (not isinstance(row, dict) or
16206 constants.IDISK_SIZE not in row or
16207 not isinstance(row[constants.IDISK_SIZE], int) or
16208 constants.IDISK_MODE not in row or
16209 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16210 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16211 " parameter", errors.ECODE_INVAL)
16212 if self.op.hypervisor is None:
16213 self.op.hypervisor = self.cfg.GetHypervisorType()
16214 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16215 fname = _ExpandInstanceName(self.cfg, self.op.name)
16216 self.op.name = fname
16217 self.relocate_from = \
16218 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16219 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16220 constants.IALLOCATOR_MODE_NODE_EVAC):
16221 if not self.op.instances:
16222 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16223 self.op.instances = _GetWantedInstances(self, self.op.instances)
16225 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16226 self.op.mode, errors.ECODE_INVAL)
16228 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16229 if self.op.iallocator is None:
16230 raise errors.OpPrereqError("Missing allocator name",
16231 errors.ECODE_INVAL)
16232 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16233 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16234 self.op.direction, errors.ECODE_INVAL)
16236 def Exec(self, feedback_fn):
16237 """Run the allocator test.
16240 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16241 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16242 memory=self.op.memory,
16243 disks=self.op.disks,
16244 disk_template=self.op.disk_template,
16248 vcpus=self.op.vcpus,
16249 spindle_use=self.op.spindle_use,
16250 hypervisor=self.op.hypervisor,
16251 node_whitelist=None)
16252 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16253 req = iallocator.IAReqRelocate(name=self.op.name,
16254 relocate_from=list(self.relocate_from))
16255 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16256 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16257 target_groups=self.op.target_groups)
16258 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16259 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16260 evac_mode=self.op.evac_mode)
16261 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16262 disk_template = self.op.disk_template
16263 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16264 memory=self.op.memory,
16265 disks=self.op.disks,
16266 disk_template=disk_template,
16270 vcpus=self.op.vcpus,
16271 spindle_use=self.op.spindle_use,
16272 hypervisor=self.op.hypervisor)
16273 for idx in range(self.op.count)]
16274 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16276 raise errors.ProgrammerError("Uncatched mode %s in"
16277 " LUTestAllocator.Exec", self.op.mode)
16279 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16280 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16281 result = ial.in_text
16283 ial.Run(self.op.iallocator, validate=False)
16284 result = ial.out_text
16288 class LUNetworkAdd(LogicalUnit):
16289 """Logical unit for creating networks.
16292 HPATH = "network-add"
16293 HTYPE = constants.HTYPE_NETWORK
16296 def BuildHooksNodes(self):
16297 """Build hooks nodes.
16300 mn = self.cfg.GetMasterNode()
16301 return ([mn], [mn])
16303 def CheckArguments(self):
16304 if self.op.mac_prefix:
16305 self.op.mac_prefix = \
16306 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16308 def ExpandNames(self):
16309 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16311 if self.op.conflicts_check:
16312 self.share_locks[locking.LEVEL_NODE] = 1
16313 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16314 self.needed_locks = {
16315 locking.LEVEL_NODE: locking.ALL_SET,
16316 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16319 self.needed_locks = {}
16321 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16323 def CheckPrereq(self):
16324 if self.op.network is None:
16325 raise errors.OpPrereqError("Network must be given",
16326 errors.ECODE_INVAL)
16329 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16330 except errors.OpPrereqError:
16333 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16334 " network (UUID: %s)" %
16335 (self.op.network_name, existing_uuid),
16336 errors.ECODE_EXISTS)
16338 # Check tag validity
16339 for tag in self.op.tags:
16340 objects.TaggableObject.ValidateTag(tag)
16342 def BuildHooksEnv(self):
16343 """Build hooks env.
16347 "name": self.op.network_name,
16348 "subnet": self.op.network,
16349 "gateway": self.op.gateway,
16350 "network6": self.op.network6,
16351 "gateway6": self.op.gateway6,
16352 "mac_prefix": self.op.mac_prefix,
16353 "tags": self.op.tags,
16355 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16357 def Exec(self, feedback_fn):
16358 """Add the ip pool to the cluster.
16361 nobj = objects.Network(name=self.op.network_name,
16362 network=self.op.network,
16363 gateway=self.op.gateway,
16364 network6=self.op.network6,
16365 gateway6=self.op.gateway6,
16366 mac_prefix=self.op.mac_prefix,
16367 uuid=self.network_uuid)
16368 # Initialize the associated address pool
16370 pool = network.AddressPool.InitializeNetwork(nobj)
16371 except errors.AddressPoolError, err:
16372 raise errors.OpExecError("Cannot create IP address pool for network"
16373 " '%s': %s" % (self.op.network_name, err))
16375 # Check if we need to reserve the nodes and the cluster master IP
16376 # These may not be allocated to any instances in routed mode, as
16377 # they wouldn't function anyway.
16378 if self.op.conflicts_check:
16379 for node in self.cfg.GetAllNodesInfo().values():
16380 for ip in [node.primary_ip, node.secondary_ip]:
16382 if pool.Contains(ip):
16384 self.LogInfo("Reserved IP address of node '%s' (%s)",
16386 except errors.AddressPoolError, err:
16387 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16388 ip, node.name, err)
16390 master_ip = self.cfg.GetClusterInfo().master_ip
16392 if pool.Contains(master_ip):
16393 pool.Reserve(master_ip)
16394 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16395 except errors.AddressPoolError, err:
16396 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16399 if self.op.add_reserved_ips:
16400 for ip in self.op.add_reserved_ips:
16402 pool.Reserve(ip, external=True)
16403 except errors.AddressPoolError, err:
16404 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16408 for tag in self.op.tags:
16411 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16412 del self.remove_locks[locking.LEVEL_NETWORK]
16415 class LUNetworkRemove(LogicalUnit):
16416 HPATH = "network-remove"
16417 HTYPE = constants.HTYPE_NETWORK
16420 def ExpandNames(self):
16421 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16423 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16424 self.needed_locks = {
16425 locking.LEVEL_NETWORK: [self.network_uuid],
16426 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16429 def CheckPrereq(self):
16430 """Check prerequisites.
16432 This checks that the given network name exists as a network, that is
16433 empty (i.e., contains no nodes), and that is not the last group of the
16437 # Verify that the network is not conncted.
16438 node_groups = [group.name
16439 for group in self.cfg.GetAllNodeGroupsInfo().values()
16440 if self.network_uuid in group.networks]
16443 self.LogWarning("Network '%s' is connected to the following"
16444 " node groups: %s" %
16445 (self.op.network_name,
16446 utils.CommaJoin(utils.NiceSort(node_groups))))
16447 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16449 def BuildHooksEnv(self):
16450 """Build hooks env.
16454 "NETWORK_NAME": self.op.network_name,
16457 def BuildHooksNodes(self):
16458 """Build hooks nodes.
16461 mn = self.cfg.GetMasterNode()
16462 return ([mn], [mn])
16464 def Exec(self, feedback_fn):
16465 """Remove the network.
16469 self.cfg.RemoveNetwork(self.network_uuid)
16470 except errors.ConfigurationError:
16471 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16472 (self.op.network_name, self.network_uuid))
16475 class LUNetworkSetParams(LogicalUnit):
16476 """Modifies the parameters of a network.
16479 HPATH = "network-modify"
16480 HTYPE = constants.HTYPE_NETWORK
16483 def CheckArguments(self):
16484 if (self.op.gateway and
16485 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16486 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16487 " at once", errors.ECODE_INVAL)
16489 def ExpandNames(self):
16490 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16492 self.needed_locks = {
16493 locking.LEVEL_NETWORK: [self.network_uuid],
16496 def CheckPrereq(self):
16497 """Check prerequisites.
16500 self.network = self.cfg.GetNetwork(self.network_uuid)
16501 self.gateway = self.network.gateway
16502 self.mac_prefix = self.network.mac_prefix
16503 self.network6 = self.network.network6
16504 self.gateway6 = self.network.gateway6
16505 self.tags = self.network.tags
16507 self.pool = network.AddressPool(self.network)
16509 if self.op.gateway:
16510 if self.op.gateway == constants.VALUE_NONE:
16511 self.gateway = None
16513 self.gateway = self.op.gateway
16514 if self.pool.IsReserved(self.gateway):
16515 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16516 " reserved" % self.gateway,
16517 errors.ECODE_STATE)
16519 if self.op.mac_prefix:
16520 if self.op.mac_prefix == constants.VALUE_NONE:
16521 self.mac_prefix = None
16523 self.mac_prefix = \
16524 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16526 if self.op.gateway6:
16527 if self.op.gateway6 == constants.VALUE_NONE:
16528 self.gateway6 = None
16530 self.gateway6 = self.op.gateway6
16532 if self.op.network6:
16533 if self.op.network6 == constants.VALUE_NONE:
16534 self.network6 = None
16536 self.network6 = self.op.network6
16538 def BuildHooksEnv(self):
16539 """Build hooks env.
16543 "name": self.op.network_name,
16544 "subnet": self.network.network,
16545 "gateway": self.gateway,
16546 "network6": self.network6,
16547 "gateway6": self.gateway6,
16548 "mac_prefix": self.mac_prefix,
16551 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16553 def BuildHooksNodes(self):
16554 """Build hooks nodes.
16557 mn = self.cfg.GetMasterNode()
16558 return ([mn], [mn])
16560 def Exec(self, feedback_fn):
16561 """Modifies the network.
16564 #TODO: reserve/release via temporary reservation manager
16565 # extend cfg.ReserveIp/ReleaseIp with the external flag
16566 if self.op.gateway:
16567 if self.gateway == self.network.gateway:
16568 self.LogWarning("Gateway is already %s", self.gateway)
16571 self.pool.Reserve(self.gateway, external=True)
16572 if self.network.gateway:
16573 self.pool.Release(self.network.gateway, external=True)
16574 self.network.gateway = self.gateway
16576 if self.op.add_reserved_ips:
16577 for ip in self.op.add_reserved_ips:
16579 if self.pool.IsReserved(ip):
16580 self.LogWarning("IP address %s is already reserved", ip)
16582 self.pool.Reserve(ip, external=True)
16583 except errors.AddressPoolError, err:
16584 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16586 if self.op.remove_reserved_ips:
16587 for ip in self.op.remove_reserved_ips:
16588 if ip == self.network.gateway:
16589 self.LogWarning("Cannot unreserve Gateway's IP")
16592 if not self.pool.IsReserved(ip):
16593 self.LogWarning("IP address %s is already unreserved", ip)
16595 self.pool.Release(ip, external=True)
16596 except errors.AddressPoolError, err:
16597 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16599 if self.op.mac_prefix:
16600 self.network.mac_prefix = self.mac_prefix
16602 if self.op.network6:
16603 self.network.network6 = self.network6
16605 if self.op.gateway6:
16606 self.network.gateway6 = self.gateway6
16608 self.pool.Validate()
16610 self.cfg.Update(self.network, feedback_fn)
16613 class _NetworkQuery(_QueryBase):
16614 FIELDS = query.NETWORK_FIELDS
16616 def ExpandNames(self, lu):
16617 lu.needed_locks = {}
16618 lu.share_locks = _ShareAll()
16620 self.do_locking = self.use_locking
16622 all_networks = lu.cfg.GetAllNetworksInfo()
16623 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16629 for name in self.names:
16630 if name in name_to_uuid:
16631 self.wanted.append(name_to_uuid[name])
16633 missing.append(name)
16636 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16637 errors.ECODE_NOENT)
16639 self.wanted = locking.ALL_SET
16641 if self.do_locking:
16642 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16643 if query.NETQ_INST in self.requested_data:
16644 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16645 if query.NETQ_GROUP in self.requested_data:
16646 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16648 def DeclareLocks(self, lu, level):
16651 def _GetQueryData(self, lu):
16652 """Computes the list of networks and their attributes.
16655 all_networks = lu.cfg.GetAllNetworksInfo()
16657 network_uuids = self._GetNames(lu, all_networks.keys(),
16658 locking.LEVEL_NETWORK)
16660 do_instances = query.NETQ_INST in self.requested_data
16661 do_groups = query.NETQ_GROUP in self.requested_data
16663 network_to_instances = None
16664 network_to_groups = None
16666 # For NETQ_GROUP, we need to map network->[groups]
16668 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16669 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16670 for _, group in all_groups.iteritems():
16671 for net_uuid in network_uuids:
16672 netparams = group.networks.get(net_uuid, None)
16674 info = (group.name, netparams[constants.NIC_MODE],
16675 netparams[constants.NIC_LINK])
16677 network_to_groups[net_uuid].append(info)
16680 all_instances = lu.cfg.GetAllInstancesInfo()
16681 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16682 for instance in all_instances.values():
16683 for nic in instance.nics:
16684 if nic.network in network_uuids:
16685 network_to_instances[nic.network].append(instance.name)
16688 if query.NETQ_STATS in self.requested_data:
16691 self._GetStats(network.AddressPool(all_networks[uuid])))
16692 for uuid in network_uuids)
16696 return query.NetworkQueryData([all_networks[uuid]
16697 for uuid in network_uuids],
16699 network_to_instances,
16703 def _GetStats(pool):
16704 """Returns statistics for a network address pool.
16708 "free_count": pool.GetFreeCount(),
16709 "reserved_count": pool.GetReservedCount(),
16710 "map": pool.GetMap(),
16711 "external_reservations":
16712 utils.CommaJoin(pool.GetExternalReservations()),
16716 class LUNetworkQuery(NoHooksLU):
16717 """Logical unit for querying networks.
16722 def CheckArguments(self):
16723 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16724 self.op.output_fields, self.op.use_locking)
16726 def ExpandNames(self):
16727 self.nq.ExpandNames(self)
16729 def Exec(self, feedback_fn):
16730 return self.nq.OldStyleQuery(self)
16733 class LUNetworkConnect(LogicalUnit):
16734 """Connect a network to a nodegroup
16737 HPATH = "network-connect"
16738 HTYPE = constants.HTYPE_NETWORK
16741 def ExpandNames(self):
16742 self.network_name = self.op.network_name
16743 self.group_name = self.op.group_name
16744 self.network_mode = self.op.network_mode
16745 self.network_link = self.op.network_link
16747 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16748 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16750 self.needed_locks = {
16751 locking.LEVEL_INSTANCE: [],
16752 locking.LEVEL_NODEGROUP: [self.group_uuid],
16754 self.share_locks[locking.LEVEL_INSTANCE] = 1
16756 if self.op.conflicts_check:
16757 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16758 self.share_locks[locking.LEVEL_NETWORK] = 1
16760 def DeclareLocks(self, level):
16761 if level == locking.LEVEL_INSTANCE:
16762 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16764 # Lock instances optimistically, needs verification once group lock has
16766 if self.op.conflicts_check:
16767 self.needed_locks[locking.LEVEL_INSTANCE] = \
16768 self.cfg.GetNodeGroupInstances(self.group_uuid)
16770 def BuildHooksEnv(self):
16772 "GROUP_NAME": self.group_name,
16773 "GROUP_NETWORK_MODE": self.network_mode,
16774 "GROUP_NETWORK_LINK": self.network_link,
16778 def BuildHooksNodes(self):
16779 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16780 return (nodes, nodes)
16782 def CheckPrereq(self):
16783 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16785 assert self.group_uuid in owned_groups
16787 # Check if locked instances are still correct
16788 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16789 if self.op.conflicts_check:
16790 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16793 constants.NIC_MODE: self.network_mode,
16794 constants.NIC_LINK: self.network_link,
16796 objects.NIC.CheckParameterSyntax(self.netparams)
16798 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16799 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16800 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16801 self.connected = False
16802 if self.network_uuid in self.group.networks:
16803 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16804 (self.network_name, self.group.name))
16805 self.connected = True
16807 # check only if not already connected
16808 elif self.op.conflicts_check:
16809 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16811 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16812 "connect to", owned_instances)
16814 def Exec(self, feedback_fn):
16815 # Connect the network and update the group only if not already connected
16816 if not self.connected:
16817 self.group.networks[self.network_uuid] = self.netparams
16818 self.cfg.Update(self.group, feedback_fn)
16821 def _NetworkConflictCheck(lu, check_fn, action, instances):
16822 """Checks for network interface conflicts with a network.
16824 @type lu: L{LogicalUnit}
16825 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16827 @param check_fn: Function checking for conflict
16828 @type action: string
16829 @param action: Part of error message (see code)
16830 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16835 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16836 instconflicts = [(idx, nic.ip)
16837 for (idx, nic) in enumerate(instance.nics)
16841 conflicts.append((instance.name, instconflicts))
16844 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16845 " node group '%s', are in use: %s" %
16846 (lu.network_name, action, lu.group.name,
16847 utils.CommaJoin(("%s: %s" %
16848 (name, _FmtNetworkConflict(details)))
16849 for (name, details) in conflicts)))
16851 raise errors.OpPrereqError("Conflicting IP addresses found; "
16852 " remove/modify the corresponding network"
16853 " interfaces", errors.ECODE_STATE)
16856 def _FmtNetworkConflict(details):
16857 """Utility for L{_NetworkConflictCheck}.
16860 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16861 for (idx, ipaddr) in details)
16864 class LUNetworkDisconnect(LogicalUnit):
16865 """Disconnect a network to a nodegroup
16868 HPATH = "network-disconnect"
16869 HTYPE = constants.HTYPE_NETWORK
16872 def ExpandNames(self):
16873 self.network_name = self.op.network_name
16874 self.group_name = self.op.group_name
16876 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16877 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16879 self.needed_locks = {
16880 locking.LEVEL_INSTANCE: [],
16881 locking.LEVEL_NODEGROUP: [self.group_uuid],
16883 self.share_locks[locking.LEVEL_INSTANCE] = 1
16885 def DeclareLocks(self, level):
16886 if level == locking.LEVEL_INSTANCE:
16887 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16889 # Lock instances optimistically, needs verification once group lock has
16891 self.needed_locks[locking.LEVEL_INSTANCE] = \
16892 self.cfg.GetNodeGroupInstances(self.group_uuid)
16894 def BuildHooksEnv(self):
16896 "GROUP_NAME": self.group_name,
16900 def BuildHooksNodes(self):
16901 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16902 return (nodes, nodes)
16904 def CheckPrereq(self):
16905 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16907 assert self.group_uuid in owned_groups
16909 # Check if locked instances are still correct
16910 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16911 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16913 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16914 self.connected = True
16915 if self.network_uuid not in self.group.networks:
16916 self.LogWarning("Network '%s' is not mapped to group '%s'",
16917 self.network_name, self.group.name)
16918 self.connected = False
16920 # We need this check only if network is not already connected
16922 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16923 "disconnect from", owned_instances)
16925 def Exec(self, feedback_fn):
16926 # Disconnect the network and update the group only if network is connected
16928 del self.group.networks[self.network_uuid]
16929 self.cfg.Update(self.group, feedback_fn)
16932 #: Query type implementations
16934 constants.QR_CLUSTER: _ClusterQuery,
16935 constants.QR_INSTANCE: _InstanceQuery,
16936 constants.QR_NODE: _NodeQuery,
16937 constants.QR_GROUP: _GroupQuery,
16938 constants.QR_NETWORK: _NetworkQuery,
16939 constants.QR_OS: _OsQuery,
16940 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16941 constants.QR_EXPORT: _ExportQuery,
16944 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16947 def _GetQueryImplementation(name):
16948 """Returns the implemtnation for a query type.
16950 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16954 return _QUERY_IMPL[name]
16956 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16957 errors.ECODE_INVAL)
16960 def _CheckForConflictingIp(lu, ip, node):
16961 """In case of conflicting IP address raise error.
16964 @param ip: IP address
16966 @param node: node name
16969 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16970 if conf_net is not None:
16971 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16973 errors.ECODE_STATE)
16975 return (None, None)