4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckGlobalHvParams(params):
1024 """Validates that given hypervisor params are not global ones.
1026 This will ensure that instances don't get customised versions of
1030 used_globals = constants.HVC_GLOBALS.intersection(params)
1032 msg = ("The following hypervisor parameters are global and cannot"
1033 " be customized at instance level, please modify them at"
1034 " cluster level: %s" % utils.CommaJoin(used_globals))
1035 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1038 def _CheckNodeOnline(lu, node, msg=None):
1039 """Ensure that a given node is online.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @param msg: if passed, should be a message to replace the default one
1044 @raise errors.OpPrereqError: if the node is offline
1048 msg = "Can't use offline node"
1049 if lu.cfg.GetNodeInfo(node).offline:
1050 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1053 def _CheckNodeNotDrained(lu, node):
1054 """Ensure that a given node is not drained.
1056 @param lu: the LU on behalf of which we make the check
1057 @param node: the node to check
1058 @raise errors.OpPrereqError: if the node is drained
1061 if lu.cfg.GetNodeInfo(node).drained:
1062 raise errors.OpPrereqError("Can't use drained node %s" % node,
1066 def _CheckNodeVmCapable(lu, node):
1067 """Ensure that a given node is vm capable.
1069 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @raise errors.OpPrereqError: if the node is not vm capable
1074 if not lu.cfg.GetNodeInfo(node).vm_capable:
1075 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080 """Ensure that a node supports a given OS.
1082 @param lu: the LU on behalf of which we make the check
1083 @param node: the node to check
1084 @param os_name: the OS to query about
1085 @param force_variant: whether to ignore variant errors
1086 @raise errors.OpPrereqError: if the node is not supporting the OS
1089 result = lu.rpc.call_os_get(node, os_name)
1090 result.Raise("OS '%s' not in supported OS list for node %s" %
1092 prereq=True, ecode=errors.ECODE_INVAL)
1093 if not force_variant:
1094 _CheckOSVariant(result.payload, os_name)
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098 """Ensure that a node has the given secondary ip.
1100 @type lu: L{LogicalUnit}
1101 @param lu: the LU on behalf of which we make the check
1103 @param node: the node to check
1104 @type secondary_ip: string
1105 @param secondary_ip: the ip to check
1106 @type prereq: boolean
1107 @param prereq: whether to throw a prerequisite or an execute error
1108 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1112 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113 result.Raise("Failure checking secondary ip on node %s" % node,
1114 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115 if not result.payload:
1116 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117 " please fix and re-run this command" % secondary_ip)
1119 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1121 raise errors.OpExecError(msg)
1124 def _CheckNodePVs(nresult, exclusive_storage):
1128 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129 if pvlist_dict is None:
1130 return (["Can't get PV list from node"], None)
1131 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1133 # check that ':' is not present in PV names, since it's a
1134 # special character for lvcreate (denotes the range of PEs to
1138 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139 (pv.name, pv.vg_name))
1141 if exclusive_storage:
1142 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143 errlist.extend(errmsgs)
1144 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1146 for (pvname, lvlist) in shared_pvs:
1147 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149 (pvname, utils.CommaJoin(lvlist)))
1150 return (errlist, es_pvinfo)
1153 def _GetClusterDomainSecret():
1154 """Reads the cluster domain secret.
1157 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162 """Ensure that an instance is in one of the required states.
1164 @param lu: the LU on behalf of which we make the check
1165 @param instance: the instance to check
1166 @param msg: if passed, should be a message to replace the default one
1167 @raise errors.OpPrereqError: if the instance is not in the required state
1171 msg = ("can't use instance from outside %s states" %
1172 utils.CommaJoin(req_states))
1173 if instance.admin_state not in req_states:
1174 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175 (instance.name, instance.admin_state, msg),
1178 if constants.ADMINST_UP not in req_states:
1179 pnode = instance.primary_node
1180 if not lu.cfg.GetNodeInfo(pnode).offline:
1181 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183 prereq=True, ecode=errors.ECODE_ENVIRON)
1184 if instance.name in ins_l.payload:
1185 raise errors.OpPrereqError("Instance %s is running, %s" %
1186 (instance.name, msg), errors.ECODE_STATE)
1188 lu.LogWarning("Primary node offline, ignoring check that instance"
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193 """Computes if value is in the desired range.
1195 @param name: name of the parameter for which we perform the check
1196 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1198 @param ipolicy: dictionary containing min, max and std values
1199 @param value: actual value that we want to use
1200 @return: None or element not meeting the criteria
1204 if value in [None, constants.VALUE_AUTO]:
1206 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208 if value > max_v or min_v > value:
1210 fqn = "%s/%s" % (name, qualifier)
1213 return ("%s value %s is not in range [%s, %s]" %
1214 (fqn, value, min_v, max_v))
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219 nic_count, disk_sizes, spindle_use,
1220 _compute_fn=_ComputeMinMaxSpec):
1221 """Verifies ipolicy against provided specs.
1224 @param ipolicy: The ipolicy
1226 @param mem_size: The memory size
1227 @type cpu_count: int
1228 @param cpu_count: Used cpu cores
1229 @type disk_count: int
1230 @param disk_count: Number of disks used
1231 @type nic_count: int
1232 @param nic_count: Number of nics used
1233 @type disk_sizes: list of ints
1234 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235 @type spindle_use: int
1236 @param spindle_use: The number of spindles this instance uses
1237 @param _compute_fn: The compute function (unittest only)
1238 @return: A list of violations, or an empty list of no violations are found
1241 assert disk_count == len(disk_sizes)
1244 (constants.ISPEC_MEM_SIZE, "", mem_size),
1245 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246 (constants.ISPEC_DISK_COUNT, "", disk_count),
1247 (constants.ISPEC_NIC_COUNT, "", nic_count),
1248 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250 for idx, d in enumerate(disk_sizes)]
1253 (_compute_fn(name, qualifier, ipolicy, value)
1254 for (name, qualifier, value) in test_settings))
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258 _compute_fn=_ComputeIPolicySpecViolation):
1259 """Compute if instance meets the specs of ipolicy.
1262 @param ipolicy: The ipolicy to verify against
1263 @type instance: L{objects.Instance}
1264 @param instance: The instance to verify
1265 @param _compute_fn: The function to verify ipolicy (unittest only)
1266 @see: L{_ComputeIPolicySpecViolation}
1269 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272 disk_count = len(instance.disks)
1273 disk_sizes = [disk.size for disk in instance.disks]
1274 nic_count = len(instance.nics)
1276 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277 disk_sizes, spindle_use)
1280 def _ComputeIPolicyInstanceSpecViolation(
1281 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282 """Compute if instance specs meets the specs of ipolicy.
1285 @param ipolicy: The ipolicy to verify against
1286 @param instance_spec: dict
1287 @param instance_spec: The instance spec to verify
1288 @param _compute_fn: The function to verify ipolicy (unittest only)
1289 @see: L{_ComputeIPolicySpecViolation}
1292 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1299 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300 disk_sizes, spindle_use)
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1305 _compute_fn=_ComputeIPolicyInstanceViolation):
1306 """Compute if instance meets the specs of the new target group.
1308 @param ipolicy: The ipolicy to verify
1309 @param instance: The instance object to verify
1310 @param current_group: The current group of the instance
1311 @param target_group: The new group of the instance
1312 @param _compute_fn: The function to verify ipolicy (unittest only)
1313 @see: L{_ComputeIPolicySpecViolation}
1316 if current_group == target_group:
1319 return _compute_fn(ipolicy, instance)
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323 _compute_fn=_ComputeIPolicyNodeViolation):
1324 """Checks that the target node is correct in terms of instance policy.
1326 @param ipolicy: The ipolicy to verify
1327 @param instance: The instance object to verify
1328 @param node: The new node to relocate
1329 @param ignore: Ignore violations of the ipolicy
1330 @param _compute_fn: The function to verify ipolicy (unittest only)
1331 @see: L{_ComputeIPolicySpecViolation}
1334 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1338 msg = ("Instance does not meet target node group's (%s) instance"
1339 " policy: %s") % (node.group, utils.CommaJoin(res))
1343 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347 """Computes a set of any instances that would violate the new ipolicy.
1349 @param old_ipolicy: The current (still in-place) ipolicy
1350 @param new_ipolicy: The new (to become) ipolicy
1351 @param instances: List of instances to verify
1352 @return: A list of instances which violates the new ipolicy but
1356 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357 _ComputeViolatingInstances(old_ipolicy, instances))
1360 def _ExpandItemName(fn, name, kind):
1361 """Expand an item name.
1363 @param fn: the function to use for expansion
1364 @param name: requested item name
1365 @param kind: text description ('Node' or 'Instance')
1366 @return: the resolved (full) name
1367 @raise errors.OpPrereqError: if the item is not found
1370 full_name = fn(name)
1371 if full_name is None:
1372 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1377 def _ExpandNodeName(cfg, name):
1378 """Wrapper over L{_ExpandItemName} for nodes."""
1379 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1382 def _ExpandInstanceName(cfg, name):
1383 """Wrapper over L{_ExpandItemName} for instance."""
1384 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388 network_type, mac_prefix, tags):
1389 """Builds network related env variables for hooks
1391 This builds the hook environment from individual variables.
1394 @param name: the name of the network
1395 @type subnet: string
1396 @param subnet: the ipv4 subnet
1397 @type gateway: string
1398 @param gateway: the ipv4 gateway
1399 @type network6: string
1400 @param network6: the ipv6 subnet
1401 @type gateway6: string
1402 @param gateway6: the ipv6 gateway
1403 @type network_type: string
1404 @param network_type: the type of the network
1405 @type mac_prefix: string
1406 @param mac_prefix: the mac_prefix
1408 @param tags: the tags of the network
1413 env["NETWORK_NAME"] = name
1415 env["NETWORK_SUBNET"] = subnet
1417 env["NETWORK_GATEWAY"] = gateway
1419 env["NETWORK_SUBNET6"] = network6
1421 env["NETWORK_GATEWAY6"] = gateway6
1423 env["NETWORK_MAC_PREFIX"] = mac_prefix
1425 env["NETWORK_TYPE"] = network_type
1427 env["NETWORK_TAGS"] = " ".join(tags)
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433 minmem, maxmem, vcpus, nics, disk_template, disks,
1434 bep, hvp, hypervisor_name, tags):
1435 """Builds instance related env variables for hooks
1437 This builds the hook environment from individual variables.
1440 @param name: the name of the instance
1441 @type primary_node: string
1442 @param primary_node: the name of the instance's primary node
1443 @type secondary_nodes: list
1444 @param secondary_nodes: list of secondary nodes as strings
1445 @type os_type: string
1446 @param os_type: the name of the instance's OS
1447 @type status: string
1448 @param status: the desired status of the instance
1449 @type minmem: string
1450 @param minmem: the minimum memory size of the instance
1451 @type maxmem: string
1452 @param maxmem: the maximum memory size of the instance
1454 @param vcpus: the count of VCPUs the instance has
1456 @param nics: list of tuples (ip, mac, mode, link, network) representing
1457 the NICs the instance has
1458 @type disk_template: string
1459 @param disk_template: the disk template of the instance
1461 @param disks: the list of (size, mode) pairs
1463 @param bep: the backend parameters for the instance
1465 @param hvp: the hypervisor parameters for the instance
1466 @type hypervisor_name: string
1467 @param hypervisor_name: the hypervisor for the instance
1469 @param tags: list of instance tags as strings
1471 @return: the hook environment for this instance
1476 "INSTANCE_NAME": name,
1477 "INSTANCE_PRIMARY": primary_node,
1478 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479 "INSTANCE_OS_TYPE": os_type,
1480 "INSTANCE_STATUS": status,
1481 "INSTANCE_MINMEM": minmem,
1482 "INSTANCE_MAXMEM": maxmem,
1483 # TODO(2.7) remove deprecated "memory" value
1484 "INSTANCE_MEMORY": maxmem,
1485 "INSTANCE_VCPUS": vcpus,
1486 "INSTANCE_DISK_TEMPLATE": disk_template,
1487 "INSTANCE_HYPERVISOR": hypervisor_name,
1490 nic_count = len(nics)
1491 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1494 env["INSTANCE_NIC%d_IP" % idx] = ip
1495 env["INSTANCE_NIC%d_MAC" % idx] = mac
1496 env["INSTANCE_NIC%d_MODE" % idx] = mode
1497 env["INSTANCE_NIC%d_LINK" % idx] = link
1499 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1501 nobj = objects.Network.FromDict(netinfo)
1503 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1505 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1507 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1509 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1511 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512 if nobj.network_type:
1513 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1515 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1556 cluster = lu.cfg.GetClusterInfo()
1557 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558 mode = filled_params[constants.NIC_MODE]
1559 link = filled_params[constants.NIC_LINK]
1563 net_uuid = lu.cfg.LookupNetwork(net)
1565 nobj = lu.cfg.GetNetwork(net_uuid)
1566 netinfo = objects.Network.ToDict(nobj)
1567 return (ip, mac, mode, link, net, netinfo)
1570 def _NICListToTuple(lu, nics):
1571 """Build a list of nic information tuples.
1573 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574 value in LUInstanceQueryData.
1576 @type lu: L{LogicalUnit}
1577 @param lu: the logical unit on whose behalf we execute
1578 @type nics: list of L{objects.NIC}
1579 @param nics: list of nics to convert to hooks tuples
1584 hooks_nics.append(_NICToTuple(lu, nic))
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589 """Builds instance related env variables for hooks from an object.
1591 @type lu: L{LogicalUnit}
1592 @param lu: the logical unit on whose behalf we execute
1593 @type instance: L{objects.Instance}
1594 @param instance: the instance for which we should build the
1596 @type override: dict
1597 @param override: dictionary with key/values that will override
1600 @return: the hook environment dictionary
1603 cluster = lu.cfg.GetClusterInfo()
1604 bep = cluster.FillBE(instance)
1605 hvp = cluster.FillHV(instance)
1607 "name": instance.name,
1608 "primary_node": instance.primary_node,
1609 "secondary_nodes": instance.secondary_nodes,
1610 "os_type": instance.os,
1611 "status": instance.admin_state,
1612 "maxmem": bep[constants.BE_MAXMEM],
1613 "minmem": bep[constants.BE_MINMEM],
1614 "vcpus": bep[constants.BE_VCPUS],
1615 "nics": _NICListToTuple(lu, instance.nics),
1616 "disk_template": instance.disk_template,
1617 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1620 "hypervisor_name": instance.hypervisor,
1621 "tags": instance.tags,
1624 args.update(override)
1625 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1628 def _AdjustCandidatePool(lu, exceptions):
1629 """Adjust the candidate pool after node operations.
1632 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1634 lu.LogInfo("Promoted nodes to master candidate role: %s",
1635 utils.CommaJoin(node.name for node in mod_list))
1636 for name in mod_list:
1637 lu.context.ReaddNode(name)
1638 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1640 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645 """Decide whether I should promote myself as a master candidate.
1648 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650 # the new node will increase mc_max with one, so:
1651 mc_should = min(mc_should + 1, cp_size)
1652 return mc_now < mc_should
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656 """Computes a set of instances who violates given ipolicy.
1658 @param ipolicy: The ipolicy to verify
1659 @type instances: object.Instance
1660 @param instances: List of instances to verify
1661 @return: A frozenset of instance names violating the ipolicy
1664 return frozenset([inst.name for inst in instances
1665 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669 """Check that the brigdes needed by a list of nics exist.
1672 cluster = lu.cfg.GetClusterInfo()
1673 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674 brlist = [params[constants.NIC_LINK] for params in paramslist
1675 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1677 result = lu.rpc.call_bridges_exist(target_node, brlist)
1678 result.Raise("Error checking bridges on destination node '%s'" %
1679 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683 """Check that the brigdes needed by an instance exist.
1687 node = instance.primary_node
1688 _CheckNicsBridgesExist(lu, instance.nics, node)
1691 def _CheckOSVariant(os_obj, name):
1692 """Check whether an OS name conforms to the os variants specification.
1694 @type os_obj: L{objects.OS}
1695 @param os_obj: OS object to check
1697 @param name: OS name passed by the user, to check for validity
1700 variant = objects.OS.GetVariant(name)
1701 if not os_obj.supported_variants:
1703 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704 " passed)" % (os_obj.name, variant),
1708 raise errors.OpPrereqError("OS name must include a variant",
1711 if variant not in os_obj.supported_variants:
1712 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1715 def _GetNodeInstancesInner(cfg, fn):
1716 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1719 def _GetNodeInstances(cfg, node_name):
1720 """Returns a list of all primary and secondary instances on a node.
1724 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728 """Returns primary instances on a node.
1731 return _GetNodeInstancesInner(cfg,
1732 lambda inst: node_name == inst.primary_node)
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736 """Returns secondary instances on a node.
1739 return _GetNodeInstancesInner(cfg,
1740 lambda inst: node_name in inst.secondary_nodes)
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744 """Returns the arguments for a storage type.
1747 # Special case for file storage
1748 if storage_type == constants.ST_FILE:
1749 # storage.FileStorage wants a list of storage directories
1750 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1758 for dev in instance.disks:
1759 cfg.SetDiskID(dev, node_name)
1761 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1763 result.Raise("Failed to get disk status from node %s" % node_name,
1764 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1766 for idx, bdev_status in enumerate(result.payload):
1767 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774 """Check the sanity of iallocator and node arguments and use the
1775 cluster-wide iallocator if appropriate.
1777 Check that at most one of (iallocator, node) is specified. If none is
1778 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779 then the LU's opcode's iallocator slot is filled with the cluster-wide
1782 @type iallocator_slot: string
1783 @param iallocator_slot: the name of the opcode iallocator slot
1784 @type node_slot: string
1785 @param node_slot: the name of the opcode target node slot
1788 node = getattr(lu.op, node_slot, None)
1789 ialloc = getattr(lu.op, iallocator_slot, None)
1793 if node is not None and ialloc is not None:
1794 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1796 elif ((node is None and ialloc is None) or
1797 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798 default_iallocator = lu.cfg.GetDefaultIAllocator()
1799 if default_iallocator:
1800 setattr(lu.op, iallocator_slot, default_iallocator)
1802 raise errors.OpPrereqError("No iallocator or node given and no"
1803 " cluster-wide default iallocator found;"
1804 " please specify either an iallocator or a"
1805 " node, or set a cluster-wide default"
1806 " iallocator", errors.ECODE_INVAL)
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810 """Decides on which iallocator to use.
1812 @type cfg: L{config.ConfigWriter}
1813 @param cfg: Cluster configuration object
1814 @type ialloc: string or None
1815 @param ialloc: Iallocator specified in opcode
1817 @return: Iallocator name
1821 # Use default iallocator
1822 ialloc = cfg.GetDefaultIAllocator()
1825 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826 " opcode nor as a cluster-wide default",
1832 def _CheckHostnameSane(lu, name):
1833 """Ensures that a given hostname resolves to a 'sane' name.
1835 The given name is required to be a prefix of the resolved hostname,
1836 to prevent accidental mismatches.
1838 @param lu: the logical unit on behalf of which we're checking
1839 @param name: the name we should resolve and check
1840 @return: the resolved hostname object
1843 hostname = netutils.GetHostname(name=name)
1844 if hostname.name != name:
1845 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846 if not utils.MatchNameComponent(name, [hostname.name]):
1847 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848 " same as given hostname '%s'") %
1849 (hostname.name, name), errors.ECODE_INVAL)
1853 class LUClusterPostInit(LogicalUnit):
1854 """Logical unit for running hooks after cluster initialization.
1857 HPATH = "cluster-init"
1858 HTYPE = constants.HTYPE_CLUSTER
1860 def BuildHooksEnv(self):
1865 "OP_TARGET": self.cfg.GetClusterName(),
1868 def BuildHooksNodes(self):
1869 """Build hooks nodes.
1872 return ([], [self.cfg.GetMasterNode()])
1874 def Exec(self, feedback_fn):
1881 class LUClusterDestroy(LogicalUnit):
1882 """Logical unit for destroying the cluster.
1885 HPATH = "cluster-destroy"
1886 HTYPE = constants.HTYPE_CLUSTER
1888 def BuildHooksEnv(self):
1893 "OP_TARGET": self.cfg.GetClusterName(),
1896 def BuildHooksNodes(self):
1897 """Build hooks nodes.
1902 def CheckPrereq(self):
1903 """Check prerequisites.
1905 This checks whether the cluster is empty.
1907 Any errors are signaled by raising errors.OpPrereqError.
1910 master = self.cfg.GetMasterNode()
1912 nodelist = self.cfg.GetNodeList()
1913 if len(nodelist) != 1 or nodelist[0] != master:
1914 raise errors.OpPrereqError("There are still %d node(s) in"
1915 " this cluster." % (len(nodelist) - 1),
1917 instancelist = self.cfg.GetInstanceList()
1919 raise errors.OpPrereqError("There are still %d instance(s) in"
1920 " this cluster." % len(instancelist),
1923 def Exec(self, feedback_fn):
1924 """Destroys the cluster.
1927 master_params = self.cfg.GetMasterNetworkParameters()
1929 # Run post hooks on master node before it's removed
1930 _RunPostHook(self, master_params.name)
1932 ems = self.cfg.GetUseExternalMipScript()
1933 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1936 self.LogWarning("Error disabling the master IP address: %s",
1939 return master_params.name
1942 def _VerifyCertificate(filename):
1943 """Verifies a certificate for L{LUClusterVerifyConfig}.
1945 @type filename: string
1946 @param filename: Path to PEM file
1950 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951 utils.ReadFile(filename))
1952 except Exception, err: # pylint: disable=W0703
1953 return (LUClusterVerifyConfig.ETYPE_ERROR,
1954 "Failed to load X509 certificate %s: %s" % (filename, err))
1957 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958 constants.SSL_CERT_EXPIRATION_ERROR)
1961 fnamemsg = "While verifying %s: %s" % (filename, msg)
1966 return (None, fnamemsg)
1967 elif errcode == utils.CERT_WARNING:
1968 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969 elif errcode == utils.CERT_ERROR:
1970 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1972 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1975 def _GetAllHypervisorParameters(cluster, instances):
1976 """Compute the set of all hypervisor parameters.
1978 @type cluster: L{objects.Cluster}
1979 @param cluster: the cluster object
1980 @param instances: list of L{objects.Instance}
1981 @param instances: additional instances from which to obtain parameters
1982 @rtype: list of (origin, hypervisor, parameters)
1983 @return: a list with all parameters found, indicating the hypervisor they
1984 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1989 for hv_name in cluster.enabled_hypervisors:
1990 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1992 for os_name, os_hvp in cluster.os_hvp.items():
1993 for hv_name, hv_params in os_hvp.items():
1995 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1998 # TODO: collapse identical parameter values in a single one
1999 for instance in instances:
2000 if instance.hvparams:
2001 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002 cluster.FillHV(instance)))
2007 class _VerifyErrors(object):
2008 """Mix-in for cluster/group verify LUs.
2010 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011 self.op and self._feedback_fn to be available.)
2015 ETYPE_FIELD = "code"
2016 ETYPE_ERROR = "ERROR"
2017 ETYPE_WARNING = "WARNING"
2019 def _Error(self, ecode, item, msg, *args, **kwargs):
2020 """Format an error message.
2022 Based on the opcode's error_codes parameter, either format a
2023 parseable error code, or a simpler error string.
2025 This must be called only from Exec and functions called from Exec.
2028 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029 itype, etxt, _ = ecode
2030 # If the error code is in the list of ignored errors, demote the error to a
2032 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2033 ltype = self.ETYPE_WARNING
2034 # first complete the msg
2037 # then format the whole message
2038 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2039 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2045 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2046 # and finally report it via the feedback_fn
2047 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2048 # do not mark the operation as failed for WARN cases only
2049 if ltype == self.ETYPE_ERROR:
2052 def _ErrorIf(self, cond, *args, **kwargs):
2053 """Log an error message if the passed condition is True.
2057 or self.op.debug_simulate_errors): # pylint: disable=E1101
2058 self._Error(*args, **kwargs)
2061 class LUClusterVerify(NoHooksLU):
2062 """Submits all jobs necessary to verify the cluster.
2067 def ExpandNames(self):
2068 self.needed_locks = {}
2070 def Exec(self, feedback_fn):
2073 if self.op.group_name:
2074 groups = [self.op.group_name]
2075 depends_fn = lambda: None
2077 groups = self.cfg.GetNodeGroupList()
2079 # Verify global configuration
2081 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2084 # Always depend on global verification
2085 depends_fn = lambda: [(-len(jobs), [])]
2088 [opcodes.OpClusterVerifyGroup(group_name=group,
2089 ignore_errors=self.op.ignore_errors,
2090 depends=depends_fn())]
2091 for group in groups)
2093 # Fix up all parameters
2094 for op in itertools.chain(*jobs): # pylint: disable=W0142
2095 op.debug_simulate_errors = self.op.debug_simulate_errors
2096 op.verbose = self.op.verbose
2097 op.error_codes = self.op.error_codes
2099 op.skip_checks = self.op.skip_checks
2100 except AttributeError:
2101 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2103 return ResultWithJobs(jobs)
2106 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2107 """Verifies the cluster config.
2112 def _VerifyHVP(self, hvp_data):
2113 """Verifies locally the syntax of the hypervisor parameters.
2116 for item, hv_name, hv_params in hvp_data:
2117 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2120 hv_class = hypervisor.GetHypervisorClass(hv_name)
2121 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2122 hv_class.CheckParameterSyntax(hv_params)
2123 except errors.GenericError, err:
2124 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2126 def ExpandNames(self):
2127 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2128 self.share_locks = _ShareAll()
2130 def CheckPrereq(self):
2131 """Check prerequisites.
2134 # Retrieve all information
2135 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2136 self.all_node_info = self.cfg.GetAllNodesInfo()
2137 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2139 def Exec(self, feedback_fn):
2140 """Verify integrity of cluster, performing various test on nodes.
2144 self._feedback_fn = feedback_fn
2146 feedback_fn("* Verifying cluster config")
2148 for msg in self.cfg.VerifyConfig():
2149 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2151 feedback_fn("* Verifying cluster certificate files")
2153 for cert_filename in pathutils.ALL_CERT_FILES:
2154 (errcode, msg) = _VerifyCertificate(cert_filename)
2155 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2157 feedback_fn("* Verifying hypervisor parameters")
2159 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2160 self.all_inst_info.values()))
2162 feedback_fn("* Verifying all nodes belong to an existing group")
2164 # We do this verification here because, should this bogus circumstance
2165 # occur, it would never be caught by VerifyGroup, which only acts on
2166 # nodes/instances reachable from existing node groups.
2168 dangling_nodes = set(node.name for node in self.all_node_info.values()
2169 if node.group not in self.all_group_info)
2171 dangling_instances = {}
2172 no_node_instances = []
2174 for inst in self.all_inst_info.values():
2175 if inst.primary_node in dangling_nodes:
2176 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2177 elif inst.primary_node not in self.all_node_info:
2178 no_node_instances.append(inst.name)
2183 utils.CommaJoin(dangling_instances.get(node.name,
2185 for node in dangling_nodes]
2187 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2189 "the following nodes (and their instances) belong to a non"
2190 " existing group: %s", utils.CommaJoin(pretty_dangling))
2192 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2194 "the following instances have a non-existing primary-node:"
2195 " %s", utils.CommaJoin(no_node_instances))
2200 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2201 """Verifies the status of a node group.
2204 HPATH = "cluster-verify"
2205 HTYPE = constants.HTYPE_CLUSTER
2208 _HOOKS_INDENT_RE = re.compile("^", re.M)
2210 class NodeImage(object):
2211 """A class representing the logical and physical status of a node.
2214 @ivar name: the node name to which this object refers
2215 @ivar volumes: a structure as returned from
2216 L{ganeti.backend.GetVolumeList} (runtime)
2217 @ivar instances: a list of running instances (runtime)
2218 @ivar pinst: list of configured primary instances (config)
2219 @ivar sinst: list of configured secondary instances (config)
2220 @ivar sbp: dictionary of {primary-node: list of instances} for all
2221 instances for which this node is secondary (config)
2222 @ivar mfree: free memory, as reported by hypervisor (runtime)
2223 @ivar dfree: free disk, as reported by the node (runtime)
2224 @ivar offline: the offline status (config)
2225 @type rpc_fail: boolean
2226 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2227 not whether the individual keys were correct) (runtime)
2228 @type lvm_fail: boolean
2229 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2230 @type hyp_fail: boolean
2231 @ivar hyp_fail: whether the RPC call didn't return the instance list
2232 @type ghost: boolean
2233 @ivar ghost: whether this is a known node or not (config)
2234 @type os_fail: boolean
2235 @ivar os_fail: whether the RPC call didn't return valid OS data
2237 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2238 @type vm_capable: boolean
2239 @ivar vm_capable: whether the node can host instances
2241 @ivar pv_min: size in MiB of the smallest PVs
2243 @ivar pv_max: size in MiB of the biggest PVs
2246 def __init__(self, offline=False, name=None, vm_capable=True):
2255 self.offline = offline
2256 self.vm_capable = vm_capable
2257 self.rpc_fail = False
2258 self.lvm_fail = False
2259 self.hyp_fail = False
2261 self.os_fail = False
2266 def ExpandNames(self):
2267 # This raises errors.OpPrereqError on its own:
2268 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2270 # Get instances in node group; this is unsafe and needs verification later
2272 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2274 self.needed_locks = {
2275 locking.LEVEL_INSTANCE: inst_names,
2276 locking.LEVEL_NODEGROUP: [self.group_uuid],
2277 locking.LEVEL_NODE: [],
2279 # This opcode is run by watcher every five minutes and acquires all nodes
2280 # for a group. It doesn't run for a long time, so it's better to acquire
2281 # the node allocation lock as well.
2282 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2285 self.share_locks = _ShareAll()
2287 def DeclareLocks(self, level):
2288 if level == locking.LEVEL_NODE:
2289 # Get members of node group; this is unsafe and needs verification later
2290 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2292 all_inst_info = self.cfg.GetAllInstancesInfo()
2294 # In Exec(), we warn about mirrored instances that have primary and
2295 # secondary living in separate node groups. To fully verify that
2296 # volumes for these instances are healthy, we will need to do an
2297 # extra call to their secondaries. We ensure here those nodes will
2299 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2300 # Important: access only the instances whose lock is owned
2301 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2302 nodes.update(all_inst_info[inst].secondary_nodes)
2304 self.needed_locks[locking.LEVEL_NODE] = nodes
2306 def CheckPrereq(self):
2307 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2308 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2310 group_nodes = set(self.group_info.members)
2312 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2315 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2317 unlocked_instances = \
2318 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2321 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2322 utils.CommaJoin(unlocked_nodes),
2325 if unlocked_instances:
2326 raise errors.OpPrereqError("Missing lock for instances: %s" %
2327 utils.CommaJoin(unlocked_instances),
2330 self.all_node_info = self.cfg.GetAllNodesInfo()
2331 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2333 self.my_node_names = utils.NiceSort(group_nodes)
2334 self.my_inst_names = utils.NiceSort(group_instances)
2336 self.my_node_info = dict((name, self.all_node_info[name])
2337 for name in self.my_node_names)
2339 self.my_inst_info = dict((name, self.all_inst_info[name])
2340 for name in self.my_inst_names)
2342 # We detect here the nodes that will need the extra RPC calls for verifying
2343 # split LV volumes; they should be locked.
2344 extra_lv_nodes = set()
2346 for inst in self.my_inst_info.values():
2347 if inst.disk_template in constants.DTS_INT_MIRROR:
2348 for nname in inst.all_nodes:
2349 if self.all_node_info[nname].group != self.group_uuid:
2350 extra_lv_nodes.add(nname)
2352 unlocked_lv_nodes = \
2353 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2355 if unlocked_lv_nodes:
2356 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2357 utils.CommaJoin(unlocked_lv_nodes),
2359 self.extra_lv_nodes = list(extra_lv_nodes)
2361 def _VerifyNode(self, ninfo, nresult):
2362 """Perform some basic validation on data returned from a node.
2364 - check the result data structure is well formed and has all the
2366 - check ganeti version
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the results from the node
2372 @return: whether overall this call was successful (and we can expect
2373 reasonable values in the respose)
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 # main result, nresult should be a non-empty dict
2380 test = not nresult or not isinstance(nresult, dict)
2381 _ErrorIf(test, constants.CV_ENODERPC, node,
2382 "unable to verify node: no data returned")
2386 # compares ganeti version
2387 local_version = constants.PROTOCOL_VERSION
2388 remote_version = nresult.get("version", None)
2389 test = not (remote_version and
2390 isinstance(remote_version, (list, tuple)) and
2391 len(remote_version) == 2)
2392 _ErrorIf(test, constants.CV_ENODERPC, node,
2393 "connection to node returned invalid data")
2397 test = local_version != remote_version[0]
2398 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2399 "incompatible protocol versions: master %s,"
2400 " node %s", local_version, remote_version[0])
2404 # node seems compatible, we can actually try to look into its results
2406 # full package version
2407 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2408 constants.CV_ENODEVERSION, node,
2409 "software version mismatch: master %s, node %s",
2410 constants.RELEASE_VERSION, remote_version[1],
2411 code=self.ETYPE_WARNING)
2413 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2414 if ninfo.vm_capable and isinstance(hyp_result, dict):
2415 for hv_name, hv_result in hyp_result.iteritems():
2416 test = hv_result is not None
2417 _ErrorIf(test, constants.CV_ENODEHV, node,
2418 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2420 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2421 if ninfo.vm_capable and isinstance(hvp_result, list):
2422 for item, hv_name, hv_result in hvp_result:
2423 _ErrorIf(True, constants.CV_ENODEHV, node,
2424 "hypervisor %s parameter verify failure (source %s): %s",
2425 hv_name, item, hv_result)
2427 test = nresult.get(constants.NV_NODESETUP,
2428 ["Missing NODESETUP results"])
2429 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2434 def _VerifyNodeTime(self, ninfo, nresult,
2435 nvinfo_starttime, nvinfo_endtime):
2436 """Check the node time.
2438 @type ninfo: L{objects.Node}
2439 @param ninfo: the node to check
2440 @param nresult: the remote results for the node
2441 @param nvinfo_starttime: the start time of the RPC call
2442 @param nvinfo_endtime: the end time of the RPC call
2446 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2448 ntime = nresult.get(constants.NV_TIME, None)
2450 ntime_merged = utils.MergeTime(ntime)
2451 except (ValueError, TypeError):
2452 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2455 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2456 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2457 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2458 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2462 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2463 "Node time diverges by at least %s from master node time",
2466 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2467 """Check the node LVM results and update info for cross-node checks.
2469 @type ninfo: L{objects.Node}
2470 @param ninfo: the node to check
2471 @param nresult: the remote results for the node
2472 @param vg_name: the configured VG name
2473 @type nimg: L{NodeImage}
2474 @param nimg: node image
2481 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2483 # checks vg existence and size > 20G
2484 vglist = nresult.get(constants.NV_VGLIST, None)
2486 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2488 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2489 constants.MIN_VG_SIZE)
2490 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2493 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2495 self._Error(constants.CV_ENODELVM, node, em)
2496 if pvminmax is not None:
2497 (nimg.pv_min, nimg.pv_max) = pvminmax
2499 def _VerifyGroupLVM(self, node_image, vg_name):
2500 """Check cross-node consistency in LVM.
2502 @type node_image: dict
2503 @param node_image: info about nodes, mapping from node to names to
2504 L{NodeImage} objects
2505 @param vg_name: the configured VG name
2511 # Only exlcusive storage needs this kind of checks
2512 if not self._exclusive_storage:
2515 # exclusive_storage wants all PVs to have the same size (approximately),
2516 # if the smallest and the biggest ones are okay, everything is fine.
2517 # pv_min is None iff pv_max is None
2518 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2521 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2522 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2523 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2524 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2525 "PV sizes differ too much in the group; smallest (%s MB) is"
2526 " on %s, biggest (%s MB) is on %s",
2527 pvmin, minnode, pvmax, maxnode)
2529 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2530 """Check the node bridges.
2532 @type ninfo: L{objects.Node}
2533 @param ninfo: the node to check
2534 @param nresult: the remote results for the node
2535 @param bridges: the expected list of bridges
2542 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2544 missing = nresult.get(constants.NV_BRIDGES, None)
2545 test = not isinstance(missing, list)
2546 _ErrorIf(test, constants.CV_ENODENET, node,
2547 "did not return valid bridge information")
2549 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2550 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2552 def _VerifyNodeUserScripts(self, ninfo, nresult):
2553 """Check the results of user scripts presence and executability on the node
2555 @type ninfo: L{objects.Node}
2556 @param ninfo: the node to check
2557 @param nresult: the remote results for the node
2562 test = not constants.NV_USERSCRIPTS in nresult
2563 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2564 "did not return user scripts information")
2566 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2568 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2569 "user scripts not present or not executable: %s" %
2570 utils.CommaJoin(sorted(broken_scripts)))
2572 def _VerifyNodeNetwork(self, ninfo, nresult):
2573 """Check the node network connectivity results.
2575 @type ninfo: L{objects.Node}
2576 @param ninfo: the node to check
2577 @param nresult: the remote results for the node
2581 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2583 test = constants.NV_NODELIST not in nresult
2584 _ErrorIf(test, constants.CV_ENODESSH, node,
2585 "node hasn't returned node ssh connectivity data")
2587 if nresult[constants.NV_NODELIST]:
2588 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2589 _ErrorIf(True, constants.CV_ENODESSH, node,
2590 "ssh communication with node '%s': %s", a_node, a_msg)
2592 test = constants.NV_NODENETTEST not in nresult
2593 _ErrorIf(test, constants.CV_ENODENET, node,
2594 "node hasn't returned node tcp connectivity data")
2596 if nresult[constants.NV_NODENETTEST]:
2597 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2599 _ErrorIf(True, constants.CV_ENODENET, node,
2600 "tcp communication with node '%s': %s",
2601 anode, nresult[constants.NV_NODENETTEST][anode])
2603 test = constants.NV_MASTERIP not in nresult
2604 _ErrorIf(test, constants.CV_ENODENET, node,
2605 "node hasn't returned node master IP reachability data")
2607 if not nresult[constants.NV_MASTERIP]:
2608 if node == self.master_node:
2609 msg = "the master node cannot reach the master IP (not configured?)"
2611 msg = "cannot reach the master IP"
2612 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2614 def _VerifyInstance(self, instance, inst_config, node_image,
2616 """Verify an instance.
2618 This function checks to see if the required block devices are
2619 available on the instance's node, and that the nodes are in the correct
2623 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624 pnode = inst_config.primary_node
2625 pnode_img = node_image[pnode]
2626 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2628 node_vol_should = {}
2629 inst_config.MapLVsByNode(node_vol_should)
2631 cluster = self.cfg.GetClusterInfo()
2632 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2634 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2635 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2636 code=self.ETYPE_WARNING)
2638 for node in node_vol_should:
2639 n_img = node_image[node]
2640 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2641 # ignore missing volumes on offline or broken nodes
2643 for volume in node_vol_should[node]:
2644 test = volume not in n_img.volumes
2645 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2646 "volume %s missing on node %s", volume, node)
2648 if inst_config.admin_state == constants.ADMINST_UP:
2649 test = instance not in pnode_img.instances and not pnode_img.offline
2650 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2651 "instance not running on its primary node %s",
2653 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2654 "instance is marked as running and lives on offline node %s",
2657 diskdata = [(nname, success, status, idx)
2658 for (nname, disks) in diskstatus.items()
2659 for idx, (success, status) in enumerate(disks)]
2661 for nname, success, bdev_status, idx in diskdata:
2662 # the 'ghost node' construction in Exec() ensures that we have a
2664 snode = node_image[nname]
2665 bad_snode = snode.ghost or snode.offline
2666 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2667 not success and not bad_snode,
2668 constants.CV_EINSTANCEFAULTYDISK, instance,
2669 "couldn't retrieve status for disk/%s on %s: %s",
2670 idx, nname, bdev_status)
2671 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2672 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2673 constants.CV_EINSTANCEFAULTYDISK, instance,
2674 "disk/%s on %s is faulty", idx, nname)
2676 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2677 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2678 " primary node failed", instance)
2680 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2681 constants.CV_EINSTANCELAYOUT,
2682 instance, "instance has multiple secondary nodes: %s",
2683 utils.CommaJoin(inst_config.secondary_nodes),
2684 code=self.ETYPE_WARNING)
2686 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2687 # Disk template not compatible with exclusive_storage: no instance
2688 # node should have the flag set
2689 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2690 inst_config.all_nodes)
2691 es_nodes = [n for (n, es) in es_flags.items()
2693 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2694 "instance has template %s, which is not supported on nodes"
2695 " that have exclusive storage set: %s",
2696 inst_config.disk_template, utils.CommaJoin(es_nodes))
2698 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2699 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2700 instance_groups = {}
2702 for node in instance_nodes:
2703 instance_groups.setdefault(self.all_node_info[node].group,
2707 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2708 # Sort so that we always list the primary node first.
2709 for group, nodes in sorted(instance_groups.items(),
2710 key=lambda (_, nodes): pnode in nodes,
2713 self._ErrorIf(len(instance_groups) > 1,
2714 constants.CV_EINSTANCESPLITGROUPS,
2715 instance, "instance has primary and secondary nodes in"
2716 " different groups: %s", utils.CommaJoin(pretty_list),
2717 code=self.ETYPE_WARNING)
2719 inst_nodes_offline = []
2720 for snode in inst_config.secondary_nodes:
2721 s_img = node_image[snode]
2722 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2723 snode, "instance %s, connection to secondary node failed",
2727 inst_nodes_offline.append(snode)
2729 # warn that the instance lives on offline nodes
2730 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2731 "instance has offline secondary node(s) %s",
2732 utils.CommaJoin(inst_nodes_offline))
2733 # ... or ghost/non-vm_capable nodes
2734 for node in inst_config.all_nodes:
2735 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2736 instance, "instance lives on ghost node %s", node)
2737 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2738 instance, "instance lives on non-vm_capable node %s", node)
2740 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2741 """Verify if there are any unknown volumes in the cluster.
2743 The .os, .swap and backup volumes are ignored. All other volumes are
2744 reported as unknown.
2746 @type reserved: L{ganeti.utils.FieldSet}
2747 @param reserved: a FieldSet of reserved volume names
2750 for node, n_img in node_image.items():
2751 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2752 self.all_node_info[node].group != self.group_uuid):
2753 # skip non-healthy nodes
2755 for volume in n_img.volumes:
2756 test = ((node not in node_vol_should or
2757 volume not in node_vol_should[node]) and
2758 not reserved.Matches(volume))
2759 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2760 "volume %s is unknown", volume)
2762 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2763 """Verify N+1 Memory Resilience.
2765 Check that if one single node dies we can still start all the
2766 instances it was primary for.
2769 cluster_info = self.cfg.GetClusterInfo()
2770 for node, n_img in node_image.items():
2771 # This code checks that every node which is now listed as
2772 # secondary has enough memory to host all instances it is
2773 # supposed to should a single other node in the cluster fail.
2774 # FIXME: not ready for failover to an arbitrary node
2775 # FIXME: does not support file-backed instances
2776 # WARNING: we currently take into account down instances as well
2777 # as up ones, considering that even if they're down someone
2778 # might want to start them even in the event of a node failure.
2779 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2780 # we're skipping nodes marked offline and nodes in other groups from
2781 # the N+1 warning, since most likely we don't have good memory
2782 # infromation from them; we already list instances living on such
2783 # nodes, and that's enough warning
2785 #TODO(dynmem): also consider ballooning out other instances
2786 for prinode, instances in n_img.sbp.items():
2788 for instance in instances:
2789 bep = cluster_info.FillBE(instance_cfg[instance])
2790 if bep[constants.BE_AUTO_BALANCE]:
2791 needed_mem += bep[constants.BE_MINMEM]
2792 test = n_img.mfree < needed_mem
2793 self._ErrorIf(test, constants.CV_ENODEN1, node,
2794 "not enough memory to accomodate instance failovers"
2795 " should node %s fail (%dMiB needed, %dMiB available)",
2796 prinode, needed_mem, n_img.mfree)
2799 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2800 (files_all, files_opt, files_mc, files_vm)):
2801 """Verifies file checksums collected from all nodes.
2803 @param errorif: Callback for reporting errors
2804 @param nodeinfo: List of L{objects.Node} objects
2805 @param master_node: Name of master node
2806 @param all_nvinfo: RPC results
2809 # Define functions determining which nodes to consider for a file
2812 (files_mc, lambda node: (node.master_candidate or
2813 node.name == master_node)),
2814 (files_vm, lambda node: node.vm_capable),
2817 # Build mapping from filename to list of nodes which should have the file
2819 for (files, fn) in files2nodefn:
2821 filenodes = nodeinfo
2823 filenodes = filter(fn, nodeinfo)
2824 nodefiles.update((filename,
2825 frozenset(map(operator.attrgetter("name"), filenodes)))
2826 for filename in files)
2828 assert set(nodefiles) == (files_all | files_mc | files_vm)
2830 fileinfo = dict((filename, {}) for filename in nodefiles)
2831 ignore_nodes = set()
2833 for node in nodeinfo:
2835 ignore_nodes.add(node.name)
2838 nresult = all_nvinfo[node.name]
2840 if nresult.fail_msg or not nresult.payload:
2843 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2844 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2845 for (key, value) in fingerprints.items())
2848 test = not (node_files and isinstance(node_files, dict))
2849 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2850 "Node did not return file checksum data")
2852 ignore_nodes.add(node.name)
2855 # Build per-checksum mapping from filename to nodes having it
2856 for (filename, checksum) in node_files.items():
2857 assert filename in nodefiles
2858 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2860 for (filename, checksums) in fileinfo.items():
2861 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2863 # Nodes having the file
2864 with_file = frozenset(node_name
2865 for nodes in fileinfo[filename].values()
2866 for node_name in nodes) - ignore_nodes
2868 expected_nodes = nodefiles[filename] - ignore_nodes
2870 # Nodes missing file
2871 missing_file = expected_nodes - with_file
2873 if filename in files_opt:
2875 errorif(missing_file and missing_file != expected_nodes,
2876 constants.CV_ECLUSTERFILECHECK, None,
2877 "File %s is optional, but it must exist on all or no"
2878 " nodes (not found on %s)",
2879 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2881 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2882 "File %s is missing from node(s) %s", filename,
2883 utils.CommaJoin(utils.NiceSort(missing_file)))
2885 # Warn if a node has a file it shouldn't
2886 unexpected = with_file - expected_nodes
2888 constants.CV_ECLUSTERFILECHECK, None,
2889 "File %s should not exist on node(s) %s",
2890 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2892 # See if there are multiple versions of the file
2893 test = len(checksums) > 1
2895 variants = ["variant %s on %s" %
2896 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2897 for (idx, (checksum, nodes)) in
2898 enumerate(sorted(checksums.items()))]
2902 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2903 "File %s found with %s different checksums (%s)",
2904 filename, len(checksums), "; ".join(variants))
2906 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2908 """Verifies and the node DRBD status.
2910 @type ninfo: L{objects.Node}
2911 @param ninfo: the node to check
2912 @param nresult: the remote results for the node
2913 @param instanceinfo: the dict of instances
2914 @param drbd_helper: the configured DRBD usermode helper
2915 @param drbd_map: the DRBD map as returned by
2916 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2920 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2923 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2924 test = (helper_result is None)
2925 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926 "no drbd usermode helper returned")
2928 status, payload = helper_result
2930 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931 "drbd usermode helper check unsuccessful: %s", payload)
2932 test = status and (payload != drbd_helper)
2933 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2934 "wrong drbd usermode helper: %s", payload)
2936 # compute the DRBD minors
2938 for minor, instance in drbd_map[node].items():
2939 test = instance not in instanceinfo
2940 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2941 "ghost instance '%s' in temporary DRBD map", instance)
2942 # ghost instance should not be running, but otherwise we
2943 # don't give double warnings (both ghost instance and
2944 # unallocated minor in use)
2946 node_drbd[minor] = (instance, False)
2948 instance = instanceinfo[instance]
2949 node_drbd[minor] = (instance.name,
2950 instance.admin_state == constants.ADMINST_UP)
2952 # and now check them
2953 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2954 test = not isinstance(used_minors, (tuple, list))
2955 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2956 "cannot parse drbd status file: %s", str(used_minors))
2958 # we cannot check drbd status
2961 for minor, (iname, must_exist) in node_drbd.items():
2962 test = minor not in used_minors and must_exist
2963 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2964 "drbd minor %d of instance %s is not active", minor, iname)
2965 for minor in used_minors:
2966 test = minor not in node_drbd
2967 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2968 "unallocated drbd minor %d is in use", minor)
2970 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2971 """Builds the node OS structures.
2973 @type ninfo: L{objects.Node}
2974 @param ninfo: the node to check
2975 @param nresult: the remote results for the node
2976 @param nimg: the node image object
2980 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2982 remote_os = nresult.get(constants.NV_OSLIST, None)
2983 test = (not isinstance(remote_os, list) or
2984 not compat.all(isinstance(v, list) and len(v) == 7
2985 for v in remote_os))
2987 _ErrorIf(test, constants.CV_ENODEOS, node,
2988 "node hasn't returned valid OS data")
2997 for (name, os_path, status, diagnose,
2998 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3000 if name not in os_dict:
3003 # parameters is a list of lists instead of list of tuples due to
3004 # JSON lacking a real tuple type, fix it:
3005 parameters = [tuple(v) for v in parameters]
3006 os_dict[name].append((os_path, status, diagnose,
3007 set(variants), set(parameters), set(api_ver)))
3009 nimg.oslist = os_dict
3011 def _VerifyNodeOS(self, ninfo, nimg, base):
3012 """Verifies the node OS list.
3014 @type ninfo: L{objects.Node}
3015 @param ninfo: the node to check
3016 @param nimg: the node image object
3017 @param base: the 'template' node we match against (e.g. from the master)
3021 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3023 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3025 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3026 for os_name, os_data in nimg.oslist.items():
3027 assert os_data, "Empty OS status for OS %s?!" % os_name
3028 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3029 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3030 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3031 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3032 "OS '%s' has multiple entries (first one shadows the rest): %s",
3033 os_name, utils.CommaJoin([v[0] for v in os_data]))
3034 # comparisons with the 'base' image
3035 test = os_name not in base.oslist
3036 _ErrorIf(test, constants.CV_ENODEOS, node,
3037 "Extra OS %s not present on reference node (%s)",
3041 assert base.oslist[os_name], "Base node has empty OS status?"
3042 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3044 # base OS is invalid, skipping
3046 for kind, a, b in [("API version", f_api, b_api),
3047 ("variants list", f_var, b_var),
3048 ("parameters", beautify_params(f_param),
3049 beautify_params(b_param))]:
3050 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3051 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3052 kind, os_name, base.name,
3053 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3055 # check any missing OSes
3056 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3057 _ErrorIf(missing, constants.CV_ENODEOS, node,
3058 "OSes present on reference node %s but missing on this node: %s",
3059 base.name, utils.CommaJoin(missing))
3061 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3062 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3064 @type ninfo: L{objects.Node}
3065 @param ninfo: the node to check
3066 @param nresult: the remote results for the node
3067 @type is_master: bool
3068 @param is_master: Whether node is the master node
3074 (constants.ENABLE_FILE_STORAGE or
3075 constants.ENABLE_SHARED_FILE_STORAGE)):
3077 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3079 # This should never happen
3080 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3081 "Node did not return forbidden file storage paths")
3083 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3084 "Found forbidden file storage paths: %s",
3085 utils.CommaJoin(fspaths))
3087 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3088 constants.CV_ENODEFILESTORAGEPATHS, node,
3089 "Node should not have returned forbidden file storage"
3092 def _VerifyOob(self, ninfo, nresult):
3093 """Verifies out of band functionality of a node.
3095 @type ninfo: L{objects.Node}
3096 @param ninfo: the node to check
3097 @param nresult: the remote results for the node
3101 # We just have to verify the paths on master and/or master candidates
3102 # as the oob helper is invoked on the master
3103 if ((ninfo.master_candidate or ninfo.master_capable) and
3104 constants.NV_OOB_PATHS in nresult):
3105 for path_result in nresult[constants.NV_OOB_PATHS]:
3106 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3108 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3109 """Verifies and updates the node volume data.
3111 This function will update a L{NodeImage}'s internal structures
3112 with data from the remote call.
3114 @type ninfo: L{objects.Node}
3115 @param ninfo: the node to check
3116 @param nresult: the remote results for the node
3117 @param nimg: the node image object
3118 @param vg_name: the configured VG name
3122 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3124 nimg.lvm_fail = True
3125 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3128 elif isinstance(lvdata, basestring):
3129 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3130 utils.SafeEncode(lvdata))
3131 elif not isinstance(lvdata, dict):
3132 _ErrorIf(True, constants.CV_ENODELVM, node,
3133 "rpc call to node failed (lvlist)")
3135 nimg.volumes = lvdata
3136 nimg.lvm_fail = False
3138 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3139 """Verifies and updates the node instance list.
3141 If the listing was successful, then updates this node's instance
3142 list. Otherwise, it marks the RPC call as failed for the instance
3145 @type ninfo: L{objects.Node}
3146 @param ninfo: the node to check
3147 @param nresult: the remote results for the node
3148 @param nimg: the node image object
3151 idata = nresult.get(constants.NV_INSTANCELIST, None)
3152 test = not isinstance(idata, list)
3153 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3154 "rpc call to node failed (instancelist): %s",
3155 utils.SafeEncode(str(idata)))
3157 nimg.hyp_fail = True
3159 nimg.instances = idata
3161 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3162 """Verifies and computes a node information map
3164 @type ninfo: L{objects.Node}
3165 @param ninfo: the node to check
3166 @param nresult: the remote results for the node
3167 @param nimg: the node image object
3168 @param vg_name: the configured VG name
3172 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3174 # try to read free memory (from the hypervisor)
3175 hv_info = nresult.get(constants.NV_HVINFO, None)
3176 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3177 _ErrorIf(test, constants.CV_ENODEHV, node,
3178 "rpc call to node failed (hvinfo)")
3181 nimg.mfree = int(hv_info["memory_free"])
3182 except (ValueError, TypeError):
3183 _ErrorIf(True, constants.CV_ENODERPC, node,
3184 "node returned invalid nodeinfo, check hypervisor")
3186 # FIXME: devise a free space model for file based instances as well
3187 if vg_name is not None:
3188 test = (constants.NV_VGLIST not in nresult or
3189 vg_name not in nresult[constants.NV_VGLIST])
3190 _ErrorIf(test, constants.CV_ENODELVM, node,
3191 "node didn't return data for the volume group '%s'"
3192 " - it is either missing or broken", vg_name)
3195 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3196 except (ValueError, TypeError):
3197 _ErrorIf(True, constants.CV_ENODERPC, node,
3198 "node returned invalid LVM info, check LVM status")
3200 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3201 """Gets per-disk status information for all instances.
3203 @type nodelist: list of strings
3204 @param nodelist: Node names
3205 @type node_image: dict of (name, L{objects.Node})
3206 @param node_image: Node objects
3207 @type instanceinfo: dict of (name, L{objects.Instance})
3208 @param instanceinfo: Instance objects
3209 @rtype: {instance: {node: [(succes, payload)]}}
3210 @return: a dictionary of per-instance dictionaries with nodes as
3211 keys and disk information as values; the disk information is a
3212 list of tuples (success, payload)
3215 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3218 node_disks_devonly = {}
3219 diskless_instances = set()
3220 diskless = constants.DT_DISKLESS
3222 for nname in nodelist:
3223 node_instances = list(itertools.chain(node_image[nname].pinst,
3224 node_image[nname].sinst))
3225 diskless_instances.update(inst for inst in node_instances
3226 if instanceinfo[inst].disk_template == diskless)
3227 disks = [(inst, disk)
3228 for inst in node_instances
3229 for disk in instanceinfo[inst].disks]
3232 # No need to collect data
3235 node_disks[nname] = disks
3237 # _AnnotateDiskParams makes already copies of the disks
3239 for (inst, dev) in disks:
3240 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3241 self.cfg.SetDiskID(anno_disk, nname)
3242 devonly.append(anno_disk)
3244 node_disks_devonly[nname] = devonly
3246 assert len(node_disks) == len(node_disks_devonly)
3248 # Collect data from all nodes with disks
3249 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3252 assert len(result) == len(node_disks)
3256 for (nname, nres) in result.items():
3257 disks = node_disks[nname]
3260 # No data from this node
3261 data = len(disks) * [(False, "node offline")]
3264 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3265 "while getting disk information: %s", msg)
3267 # No data from this node
3268 data = len(disks) * [(False, msg)]
3271 for idx, i in enumerate(nres.payload):
3272 if isinstance(i, (tuple, list)) and len(i) == 2:
3275 logging.warning("Invalid result from node %s, entry %d: %s",
3277 data.append((False, "Invalid result from the remote node"))
3279 for ((inst, _), status) in zip(disks, data):
3280 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3282 # Add empty entries for diskless instances.
3283 for inst in diskless_instances:
3284 assert inst not in instdisk
3287 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3288 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3289 compat.all(isinstance(s, (tuple, list)) and
3290 len(s) == 2 for s in statuses)
3291 for inst, nnames in instdisk.items()
3292 for nname, statuses in nnames.items())
3294 instdisk_keys = set(instdisk)
3295 instanceinfo_keys = set(instanceinfo)
3296 assert instdisk_keys == instanceinfo_keys, \
3297 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3298 (instdisk_keys, instanceinfo_keys))
3303 def _SshNodeSelector(group_uuid, all_nodes):
3304 """Create endless iterators for all potential SSH check hosts.
3307 nodes = [node for node in all_nodes
3308 if (node.group != group_uuid and
3310 keyfunc = operator.attrgetter("group")
3312 return map(itertools.cycle,
3313 [sorted(map(operator.attrgetter("name"), names))
3314 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3318 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3319 """Choose which nodes should talk to which other nodes.
3321 We will make nodes contact all nodes in their group, and one node from
3324 @warning: This algorithm has a known issue if one node group is much
3325 smaller than others (e.g. just one node). In such a case all other
3326 nodes will talk to the single node.
3329 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3330 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3332 return (online_nodes,
3333 dict((name, sorted([i.next() for i in sel]))
3334 for name in online_nodes))
3336 def BuildHooksEnv(self):
3339 Cluster-Verify hooks just ran in the post phase and their failure makes
3340 the output be logged in the verify output and the verification to fail.
3344 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3347 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3348 for node in self.my_node_info.values())
3352 def BuildHooksNodes(self):
3353 """Build hooks nodes.
3356 return ([], self.my_node_names)
3358 def Exec(self, feedback_fn):
3359 """Verify integrity of the node group, performing various test on nodes.
3362 # This method has too many local variables. pylint: disable=R0914
3363 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3365 if not self.my_node_names:
3367 feedback_fn("* Empty node group, skipping verification")
3371 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3372 verbose = self.op.verbose
3373 self._feedback_fn = feedback_fn
3375 vg_name = self.cfg.GetVGName()
3376 drbd_helper = self.cfg.GetDRBDHelper()
3377 cluster = self.cfg.GetClusterInfo()
3378 hypervisors = cluster.enabled_hypervisors
3379 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3381 i_non_redundant = [] # Non redundant instances
3382 i_non_a_balanced = [] # Non auto-balanced instances
3383 i_offline = 0 # Count of offline instances
3384 n_offline = 0 # Count of offline nodes
3385 n_drained = 0 # Count of nodes being drained
3386 node_vol_should = {}
3388 # FIXME: verify OS list
3391 filemap = _ComputeAncillaryFiles(cluster, False)
3393 # do local checksums
3394 master_node = self.master_node = self.cfg.GetMasterNode()
3395 master_ip = self.cfg.GetMasterIP()
3397 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3400 if self.cfg.GetUseExternalMipScript():
3401 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3403 node_verify_param = {
3404 constants.NV_FILELIST:
3405 map(vcluster.MakeVirtualPath,
3406 utils.UniqueSequence(filename
3407 for files in filemap
3408 for filename in files)),
3409 constants.NV_NODELIST:
3410 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3411 self.all_node_info.values()),
3412 constants.NV_HYPERVISOR: hypervisors,
3413 constants.NV_HVPARAMS:
3414 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3415 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3416 for node in node_data_list
3417 if not node.offline],
3418 constants.NV_INSTANCELIST: hypervisors,
3419 constants.NV_VERSION: None,
3420 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3421 constants.NV_NODESETUP: None,
3422 constants.NV_TIME: None,
3423 constants.NV_MASTERIP: (master_node, master_ip),
3424 constants.NV_OSLIST: None,
3425 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3426 constants.NV_USERSCRIPTS: user_scripts,
3429 if vg_name is not None:
3430 node_verify_param[constants.NV_VGLIST] = None
3431 node_verify_param[constants.NV_LVLIST] = vg_name
3432 node_verify_param[constants.NV_PVLIST] = [vg_name]
3435 node_verify_param[constants.NV_DRBDLIST] = None
3436 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3438 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3439 # Load file storage paths only from master node
3440 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3443 # FIXME: this needs to be changed per node-group, not cluster-wide
3445 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3446 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447 bridges.add(default_nicpp[constants.NIC_LINK])
3448 for instance in self.my_inst_info.values():
3449 for nic in instance.nics:
3450 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3451 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452 bridges.add(full_nic[constants.NIC_LINK])
3455 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3457 # Build our expected cluster state
3458 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3460 vm_capable=node.vm_capable))
3461 for node in node_data_list)
3465 for node in self.all_node_info.values():
3466 path = _SupportsOob(self.cfg, node)
3467 if path and path not in oob_paths:
3468 oob_paths.append(path)
3471 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3473 for instance in self.my_inst_names:
3474 inst_config = self.my_inst_info[instance]
3475 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3478 for nname in inst_config.all_nodes:
3479 if nname not in node_image:
3480 gnode = self.NodeImage(name=nname)
3481 gnode.ghost = (nname not in self.all_node_info)
3482 node_image[nname] = gnode
3484 inst_config.MapLVsByNode(node_vol_should)
3486 pnode = inst_config.primary_node
3487 node_image[pnode].pinst.append(instance)
3489 for snode in inst_config.secondary_nodes:
3490 nimg = node_image[snode]
3491 nimg.sinst.append(instance)
3492 if pnode not in nimg.sbp:
3493 nimg.sbp[pnode] = []
3494 nimg.sbp[pnode].append(instance)
3496 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3498 # The value of exclusive_storage should be the same across the group, so if
3499 # it's True for at least a node, we act as if it were set for all the nodes
3500 self._exclusive_storage = compat.any(es_flags.values())
3501 if self._exclusive_storage:
3502 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3503 es_unset_nodes = [n for (n, es) in es_flags.items()
3507 self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3508 "The exclusive_storage flag should be uniform in a group,"
3509 " but these nodes have it unset: %s",
3510 utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3511 self.LogWarning("Some checks required by exclusive storage will be"
3512 " performed also on nodes with the flag unset")
3514 # At this point, we have the in-memory data structures complete,
3515 # except for the runtime information, which we'll gather next
3517 # Due to the way our RPC system works, exact response times cannot be
3518 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3519 # time before and after executing the request, we can at least have a time
3521 nvinfo_starttime = time.time()
3522 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3524 self.cfg.GetClusterName())
3525 nvinfo_endtime = time.time()
3527 if self.extra_lv_nodes and vg_name is not None:
3529 self.rpc.call_node_verify(self.extra_lv_nodes,
3530 {constants.NV_LVLIST: vg_name},
3531 self.cfg.GetClusterName())
3533 extra_lv_nvinfo = {}
3535 all_drbd_map = self.cfg.ComputeDRBDMap()
3537 feedback_fn("* Gathering disk information (%s nodes)" %
3538 len(self.my_node_names))
3539 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3542 feedback_fn("* Verifying configuration file consistency")
3544 # If not all nodes are being checked, we need to make sure the master node
3545 # and a non-checked vm_capable node are in the list.
3546 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3548 vf_nvinfo = all_nvinfo.copy()
3549 vf_node_info = list(self.my_node_info.values())
3550 additional_nodes = []
3551 if master_node not in self.my_node_info:
3552 additional_nodes.append(master_node)
3553 vf_node_info.append(self.all_node_info[master_node])
3554 # Add the first vm_capable node we find which is not included,
3555 # excluding the master node (which we already have)
3556 for node in absent_nodes:
3557 nodeinfo = self.all_node_info[node]
3558 if (nodeinfo.vm_capable and not nodeinfo.offline and
3559 node != master_node):
3560 additional_nodes.append(node)
3561 vf_node_info.append(self.all_node_info[node])
3563 key = constants.NV_FILELIST
3564 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3565 {key: node_verify_param[key]},
3566 self.cfg.GetClusterName()))
3568 vf_nvinfo = all_nvinfo
3569 vf_node_info = self.my_node_info.values()
3571 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3573 feedback_fn("* Verifying node status")
3577 for node_i in node_data_list:
3579 nimg = node_image[node]
3583 feedback_fn("* Skipping offline node %s" % (node,))
3587 if node == master_node:
3589 elif node_i.master_candidate:
3590 ntype = "master candidate"
3591 elif node_i.drained:
3597 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3599 msg = all_nvinfo[node].fail_msg
3600 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3603 nimg.rpc_fail = True
3606 nresult = all_nvinfo[node].payload
3608 nimg.call_ok = self._VerifyNode(node_i, nresult)
3609 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3610 self._VerifyNodeNetwork(node_i, nresult)
3611 self._VerifyNodeUserScripts(node_i, nresult)
3612 self._VerifyOob(node_i, nresult)
3613 self._VerifyFileStoragePaths(node_i, nresult,
3614 node == master_node)
3617 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3618 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3621 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3622 self._UpdateNodeInstances(node_i, nresult, nimg)
3623 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3624 self._UpdateNodeOS(node_i, nresult, nimg)
3626 if not nimg.os_fail:
3627 if refos_img is None:
3629 self._VerifyNodeOS(node_i, nimg, refos_img)
3630 self._VerifyNodeBridges(node_i, nresult, bridges)
3632 # Check whether all running instancies are primary for the node. (This
3633 # can no longer be done from _VerifyInstance below, since some of the
3634 # wrong instances could be from other node groups.)
3635 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3637 for inst in non_primary_inst:
3638 test = inst in self.all_inst_info
3639 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3640 "instance should not run on node %s", node_i.name)
3641 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3642 "node is running unknown instance %s", inst)
3644 self._VerifyGroupLVM(node_image, vg_name)
3646 for node, result in extra_lv_nvinfo.items():
3647 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3648 node_image[node], vg_name)
3650 feedback_fn("* Verifying instance status")
3651 for instance in self.my_inst_names:
3653 feedback_fn("* Verifying instance %s" % instance)
3654 inst_config = self.my_inst_info[instance]
3655 self._VerifyInstance(instance, inst_config, node_image,
3658 # If the instance is non-redundant we cannot survive losing its primary
3659 # node, so we are not N+1 compliant.
3660 if inst_config.disk_template not in constants.DTS_MIRRORED:
3661 i_non_redundant.append(instance)
3663 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3664 i_non_a_balanced.append(instance)
3666 feedback_fn("* Verifying orphan volumes")
3667 reserved = utils.FieldSet(*cluster.reserved_lvs)
3669 # We will get spurious "unknown volume" warnings if any node of this group
3670 # is secondary for an instance whose primary is in another group. To avoid
3671 # them, we find these instances and add their volumes to node_vol_should.
3672 for inst in self.all_inst_info.values():
3673 for secondary in inst.secondary_nodes:
3674 if (secondary in self.my_node_info
3675 and inst.name not in self.my_inst_info):
3676 inst.MapLVsByNode(node_vol_should)
3679 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3681 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3682 feedback_fn("* Verifying N+1 Memory redundancy")
3683 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3685 feedback_fn("* Other Notes")
3687 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3688 % len(i_non_redundant))
3690 if i_non_a_balanced:
3691 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3692 % len(i_non_a_balanced))
3695 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3698 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3701 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3705 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3706 """Analyze the post-hooks' result
3708 This method analyses the hook result, handles it, and sends some
3709 nicely-formatted feedback back to the user.
3711 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3712 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3713 @param hooks_results: the results of the multi-node hooks rpc call
3714 @param feedback_fn: function used send feedback back to the caller
3715 @param lu_result: previous Exec result
3716 @return: the new Exec result, based on the previous result
3720 # We only really run POST phase hooks, only for non-empty groups,
3721 # and are only interested in their results
3722 if not self.my_node_names:
3725 elif phase == constants.HOOKS_PHASE_POST:
3726 # Used to change hooks' output to proper indentation
3727 feedback_fn("* Hooks Results")
3728 assert hooks_results, "invalid result from hooks"
3730 for node_name in hooks_results:
3731 res = hooks_results[node_name]
3733 test = msg and not res.offline
3734 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3735 "Communication failure in hooks execution: %s", msg)
3736 if res.offline or msg:
3737 # No need to investigate payload if node is offline or gave
3740 for script, hkr, output in res.payload:
3741 test = hkr == constants.HKR_FAIL
3742 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3743 "Script %s failed, output:", script)
3745 output = self._HOOKS_INDENT_RE.sub(" ", output)
3746 feedback_fn("%s" % output)
3752 class LUClusterVerifyDisks(NoHooksLU):
3753 """Verifies the cluster disks status.
3758 def ExpandNames(self):
3759 self.share_locks = _ShareAll()
3760 self.needed_locks = {
3761 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3764 def Exec(self, feedback_fn):
3765 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3767 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3768 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3769 for group in group_names])
3772 class LUGroupVerifyDisks(NoHooksLU):
3773 """Verifies the status of all disks in a node group.
3778 def ExpandNames(self):
3779 # Raises errors.OpPrereqError on its own if group can't be found
3780 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3782 self.share_locks = _ShareAll()
3783 self.needed_locks = {
3784 locking.LEVEL_INSTANCE: [],
3785 locking.LEVEL_NODEGROUP: [],
3786 locking.LEVEL_NODE: [],
3788 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3789 # starts one instance of this opcode for every group, which means all
3790 # nodes will be locked for a short amount of time, so it's better to
3791 # acquire the node allocation lock as well.
3792 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3795 def DeclareLocks(self, level):
3796 if level == locking.LEVEL_INSTANCE:
3797 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3799 # Lock instances optimistically, needs verification once node and group
3800 # locks have been acquired
3801 self.needed_locks[locking.LEVEL_INSTANCE] = \
3802 self.cfg.GetNodeGroupInstances(self.group_uuid)
3804 elif level == locking.LEVEL_NODEGROUP:
3805 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3807 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3808 set([self.group_uuid] +
3809 # Lock all groups used by instances optimistically; this requires
3810 # going via the node before it's locked, requiring verification
3813 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3814 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3816 elif level == locking.LEVEL_NODE:
3817 # This will only lock the nodes in the group to be verified which contain
3819 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3820 self._LockInstancesNodes()
3822 # Lock all nodes in group to be verified
3823 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3824 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3825 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3827 def CheckPrereq(self):
3828 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3829 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3830 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3832 assert self.group_uuid in owned_groups
3834 # Check if locked instances are still correct
3835 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3837 # Get instance information
3838 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3840 # Check if node groups for locked instances are still correct
3841 _CheckInstancesNodeGroups(self.cfg, self.instances,
3842 owned_groups, owned_nodes, self.group_uuid)
3844 def Exec(self, feedback_fn):
3845 """Verify integrity of cluster disks.
3847 @rtype: tuple of three items
3848 @return: a tuple of (dict of node-to-node_error, list of instances
3849 which need activate-disks, dict of instance: (node, volume) for
3854 res_instances = set()
3857 nv_dict = _MapInstanceDisksToNodes(
3858 [inst for inst in self.instances.values()
3859 if inst.admin_state == constants.ADMINST_UP])
3862 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3863 set(self.cfg.GetVmCapableNodeList()))
3865 node_lvs = self.rpc.call_lv_list(nodes, [])
3867 for (node, node_res) in node_lvs.items():
3868 if node_res.offline:
3871 msg = node_res.fail_msg
3873 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3874 res_nodes[node] = msg
3877 for lv_name, (_, _, lv_online) in node_res.payload.items():
3878 inst = nv_dict.pop((node, lv_name), None)
3879 if not (lv_online or inst is None):
3880 res_instances.add(inst)
3882 # any leftover items in nv_dict are missing LVs, let's arrange the data
3884 for key, inst in nv_dict.iteritems():
3885 res_missing.setdefault(inst, []).append(list(key))
3887 return (res_nodes, list(res_instances), res_missing)
3890 class LUClusterRepairDiskSizes(NoHooksLU):
3891 """Verifies the cluster disks sizes.
3896 def ExpandNames(self):
3897 if self.op.instances:
3898 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3899 # Not getting the node allocation lock as only a specific set of
3900 # instances (and their nodes) is going to be acquired
3901 self.needed_locks = {
3902 locking.LEVEL_NODE_RES: [],
3903 locking.LEVEL_INSTANCE: self.wanted_names,
3905 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3907 self.wanted_names = None
3908 self.needed_locks = {
3909 locking.LEVEL_NODE_RES: locking.ALL_SET,
3910 locking.LEVEL_INSTANCE: locking.ALL_SET,
3912 # This opcode is acquires the node locks for all instances
3913 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3916 self.share_locks = {
3917 locking.LEVEL_NODE_RES: 1,
3918 locking.LEVEL_INSTANCE: 0,
3919 locking.LEVEL_NODE_ALLOC: 1,
3922 def DeclareLocks(self, level):
3923 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3924 self._LockInstancesNodes(primary_only=True, level=level)
3926 def CheckPrereq(self):
3927 """Check prerequisites.
3929 This only checks the optional instance list against the existing names.
3932 if self.wanted_names is None:
3933 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3935 self.wanted_instances = \
3936 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3938 def _EnsureChildSizes(self, disk):
3939 """Ensure children of the disk have the needed disk size.
3941 This is valid mainly for DRBD8 and fixes an issue where the
3942 children have smaller disk size.
3944 @param disk: an L{ganeti.objects.Disk} object
3947 if disk.dev_type == constants.LD_DRBD8:
3948 assert disk.children, "Empty children for DRBD8?"
3949 fchild = disk.children[0]
3950 mismatch = fchild.size < disk.size
3952 self.LogInfo("Child disk has size %d, parent %d, fixing",
3953 fchild.size, disk.size)
3954 fchild.size = disk.size
3956 # and we recurse on this child only, not on the metadev
3957 return self._EnsureChildSizes(fchild) or mismatch
3961 def Exec(self, feedback_fn):
3962 """Verify the size of cluster disks.
3965 # TODO: check child disks too
3966 # TODO: check differences in size between primary/secondary nodes
3968 for instance in self.wanted_instances:
3969 pnode = instance.primary_node
3970 if pnode not in per_node_disks:
3971 per_node_disks[pnode] = []
3972 for idx, disk in enumerate(instance.disks):
3973 per_node_disks[pnode].append((instance, idx, disk))
3975 assert not (frozenset(per_node_disks.keys()) -
3976 self.owned_locks(locking.LEVEL_NODE_RES)), \
3977 "Not owning correct locks"
3978 assert not self.owned_locks(locking.LEVEL_NODE)
3981 for node, dskl in per_node_disks.items():
3982 newl = [v[2].Copy() for v in dskl]
3984 self.cfg.SetDiskID(dsk, node)
3985 result = self.rpc.call_blockdev_getsize(node, newl)
3987 self.LogWarning("Failure in blockdev_getsize call to node"
3988 " %s, ignoring", node)
3990 if len(result.payload) != len(dskl):
3991 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3992 " result.payload=%s", node, len(dskl), result.payload)
3993 self.LogWarning("Invalid result from node %s, ignoring node results",
3996 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3998 self.LogWarning("Disk %d of instance %s did not return size"
3999 " information, ignoring", idx, instance.name)
4001 if not isinstance(size, (int, long)):
4002 self.LogWarning("Disk %d of instance %s did not return valid"
4003 " size information, ignoring", idx, instance.name)
4006 if size != disk.size:
4007 self.LogInfo("Disk %d of instance %s has mismatched size,"
4008 " correcting: recorded %d, actual %d", idx,
4009 instance.name, disk.size, size)
4011 self.cfg.Update(instance, feedback_fn)
4012 changed.append((instance.name, idx, size))
4013 if self._EnsureChildSizes(disk):
4014 self.cfg.Update(instance, feedback_fn)
4015 changed.append((instance.name, idx, disk.size))
4019 class LUClusterRename(LogicalUnit):
4020 """Rename the cluster.
4023 HPATH = "cluster-rename"
4024 HTYPE = constants.HTYPE_CLUSTER
4026 def BuildHooksEnv(self):
4031 "OP_TARGET": self.cfg.GetClusterName(),
4032 "NEW_NAME": self.op.name,
4035 def BuildHooksNodes(self):
4036 """Build hooks nodes.
4039 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4041 def CheckPrereq(self):
4042 """Verify that the passed name is a valid one.
4045 hostname = netutils.GetHostname(name=self.op.name,
4046 family=self.cfg.GetPrimaryIPFamily())
4048 new_name = hostname.name
4049 self.ip = new_ip = hostname.ip
4050 old_name = self.cfg.GetClusterName()
4051 old_ip = self.cfg.GetMasterIP()
4052 if new_name == old_name and new_ip == old_ip:
4053 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4054 " cluster has changed",
4056 if new_ip != old_ip:
4057 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4058 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4059 " reachable on the network" %
4060 new_ip, errors.ECODE_NOTUNIQUE)
4062 self.op.name = new_name
4064 def Exec(self, feedback_fn):
4065 """Rename the cluster.
4068 clustername = self.op.name
4071 # shutdown the master IP
4072 master_params = self.cfg.GetMasterNetworkParameters()
4073 ems = self.cfg.GetUseExternalMipScript()
4074 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4076 result.Raise("Could not disable the master role")
4079 cluster = self.cfg.GetClusterInfo()
4080 cluster.cluster_name = clustername
4081 cluster.master_ip = new_ip
4082 self.cfg.Update(cluster, feedback_fn)
4084 # update the known hosts file
4085 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4086 node_list = self.cfg.GetOnlineNodeList()
4088 node_list.remove(master_params.name)
4091 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4093 master_params.ip = new_ip
4094 result = self.rpc.call_node_activate_master_ip(master_params.name,
4096 msg = result.fail_msg
4098 self.LogWarning("Could not re-enable the master role on"
4099 " the master, please restart manually: %s", msg)
4104 def _ValidateNetmask(cfg, netmask):
4105 """Checks if a netmask is valid.
4107 @type cfg: L{config.ConfigWriter}
4108 @param cfg: The cluster configuration
4110 @param netmask: the netmask to be verified
4111 @raise errors.OpPrereqError: if the validation fails
4114 ip_family = cfg.GetPrimaryIPFamily()
4116 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4117 except errors.ProgrammerError:
4118 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4119 ip_family, errors.ECODE_INVAL)
4120 if not ipcls.ValidateNetmask(netmask):
4121 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4122 (netmask), errors.ECODE_INVAL)
4125 class LUClusterSetParams(LogicalUnit):
4126 """Change the parameters of the cluster.
4129 HPATH = "cluster-modify"
4130 HTYPE = constants.HTYPE_CLUSTER
4133 def CheckArguments(self):
4137 if self.op.uid_pool:
4138 uidpool.CheckUidPool(self.op.uid_pool)
4140 if self.op.add_uids:
4141 uidpool.CheckUidPool(self.op.add_uids)
4143 if self.op.remove_uids:
4144 uidpool.CheckUidPool(self.op.remove_uids)
4146 if self.op.master_netmask is not None:
4147 _ValidateNetmask(self.cfg, self.op.master_netmask)
4149 if self.op.diskparams:
4150 for dt_params in self.op.diskparams.values():
4151 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4153 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4154 except errors.OpPrereqError, err:
4155 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4158 def ExpandNames(self):
4159 # FIXME: in the future maybe other cluster params won't require checking on
4160 # all nodes to be modified.
4161 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4162 # resource locks the right thing, shouldn't it be the BGL instead?
4163 self.needed_locks = {
4164 locking.LEVEL_NODE: locking.ALL_SET,
4165 locking.LEVEL_INSTANCE: locking.ALL_SET,
4166 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4167 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4169 self.share_locks = _ShareAll()
4171 def BuildHooksEnv(self):
4176 "OP_TARGET": self.cfg.GetClusterName(),
4177 "NEW_VG_NAME": self.op.vg_name,
4180 def BuildHooksNodes(self):
4181 """Build hooks nodes.
4184 mn = self.cfg.GetMasterNode()
4187 def CheckPrereq(self):
4188 """Check prerequisites.
4190 This checks whether the given params don't conflict and
4191 if the given volume group is valid.
4194 if self.op.vg_name is not None and not self.op.vg_name:
4195 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4196 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4197 " instances exist", errors.ECODE_INVAL)
4199 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4200 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4201 raise errors.OpPrereqError("Cannot disable drbd helper while"
4202 " drbd-based instances exist",
4205 node_list = self.owned_locks(locking.LEVEL_NODE)
4207 # if vg_name not None, checks given volume group on all nodes
4209 vglist = self.rpc.call_vg_list(node_list)
4210 for node in node_list:
4211 msg = vglist[node].fail_msg
4213 # ignoring down node
4214 self.LogWarning("Error while gathering data on node %s"
4215 " (ignoring node): %s", node, msg)
4217 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4219 constants.MIN_VG_SIZE)
4221 raise errors.OpPrereqError("Error on node '%s': %s" %
4222 (node, vgstatus), errors.ECODE_ENVIRON)
4224 if self.op.drbd_helper:
4225 # checks given drbd helper on all nodes
4226 helpers = self.rpc.call_drbd_helper(node_list)
4227 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4229 self.LogInfo("Not checking drbd helper on offline node %s", node)
4231 msg = helpers[node].fail_msg
4233 raise errors.OpPrereqError("Error checking drbd helper on node"
4234 " '%s': %s" % (node, msg),
4235 errors.ECODE_ENVIRON)
4236 node_helper = helpers[node].payload
4237 if node_helper != self.op.drbd_helper:
4238 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4239 (node, node_helper), errors.ECODE_ENVIRON)
4241 self.cluster = cluster = self.cfg.GetClusterInfo()
4242 # validate params changes
4243 if self.op.beparams:
4244 objects.UpgradeBeParams(self.op.beparams)
4245 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4246 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4248 if self.op.ndparams:
4249 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4250 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4252 # TODO: we need a more general way to handle resetting
4253 # cluster-level parameters to default values
4254 if self.new_ndparams["oob_program"] == "":
4255 self.new_ndparams["oob_program"] = \
4256 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4258 if self.op.hv_state:
4259 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4260 self.cluster.hv_state_static)
4261 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4262 for hv, values in new_hv_state.items())
4264 if self.op.disk_state:
4265 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4266 self.cluster.disk_state_static)
4267 self.new_disk_state = \
4268 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4269 for name, values in svalues.items()))
4270 for storage, svalues in new_disk_state.items())
4273 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4276 all_instances = self.cfg.GetAllInstancesInfo().values()
4278 for group in self.cfg.GetAllNodeGroupsInfo().values():
4279 instances = frozenset([inst for inst in all_instances
4280 if compat.any(node in group.members
4281 for node in inst.all_nodes)])
4282 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4283 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4284 new = _ComputeNewInstanceViolations(ipol,
4285 new_ipolicy, instances)
4287 violations.update(new)
4290 self.LogWarning("After the ipolicy change the following instances"
4291 " violate them: %s",
4292 utils.CommaJoin(utils.NiceSort(violations)))
4294 if self.op.nicparams:
4295 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4296 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4297 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4300 # check all instances for consistency
4301 for instance in self.cfg.GetAllInstancesInfo().values():
4302 for nic_idx, nic in enumerate(instance.nics):
4303 params_copy = copy.deepcopy(nic.nicparams)
4304 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4306 # check parameter syntax
4308 objects.NIC.CheckParameterSyntax(params_filled)
4309 except errors.ConfigurationError, err:
4310 nic_errors.append("Instance %s, nic/%d: %s" %
4311 (instance.name, nic_idx, err))
4313 # if we're moving instances to routed, check that they have an ip
4314 target_mode = params_filled[constants.NIC_MODE]
4315 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4316 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4317 " address" % (instance.name, nic_idx))
4319 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4320 "\n".join(nic_errors), errors.ECODE_INVAL)
4322 # hypervisor list/parameters
4323 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4324 if self.op.hvparams:
4325 for hv_name, hv_dict in self.op.hvparams.items():
4326 if hv_name not in self.new_hvparams:
4327 self.new_hvparams[hv_name] = hv_dict
4329 self.new_hvparams[hv_name].update(hv_dict)
4331 # disk template parameters
4332 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4333 if self.op.diskparams:
4334 for dt_name, dt_params in self.op.diskparams.items():
4335 if dt_name not in self.op.diskparams:
4336 self.new_diskparams[dt_name] = dt_params
4338 self.new_diskparams[dt_name].update(dt_params)
4340 # os hypervisor parameters
4341 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4343 for os_name, hvs in self.op.os_hvp.items():
4344 if os_name not in self.new_os_hvp:
4345 self.new_os_hvp[os_name] = hvs
4347 for hv_name, hv_dict in hvs.items():
4349 # Delete if it exists
4350 self.new_os_hvp[os_name].pop(hv_name, None)
4351 elif hv_name not in self.new_os_hvp[os_name]:
4352 self.new_os_hvp[os_name][hv_name] = hv_dict
4354 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4357 self.new_osp = objects.FillDict(cluster.osparams, {})
4358 if self.op.osparams:
4359 for os_name, osp in self.op.osparams.items():
4360 if os_name not in self.new_osp:
4361 self.new_osp[os_name] = {}
4363 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4366 if not self.new_osp[os_name]:
4367 # we removed all parameters
4368 del self.new_osp[os_name]
4370 # check the parameter validity (remote check)
4371 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4372 os_name, self.new_osp[os_name])
4374 # changes to the hypervisor list
4375 if self.op.enabled_hypervisors is not None:
4376 self.hv_list = self.op.enabled_hypervisors
4377 for hv in self.hv_list:
4378 # if the hypervisor doesn't already exist in the cluster
4379 # hvparams, we initialize it to empty, and then (in both
4380 # cases) we make sure to fill the defaults, as we might not
4381 # have a complete defaults list if the hypervisor wasn't
4383 if hv not in new_hvp:
4385 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4386 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4388 self.hv_list = cluster.enabled_hypervisors
4390 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4391 # either the enabled list has changed, or the parameters have, validate
4392 for hv_name, hv_params in self.new_hvparams.items():
4393 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4394 (self.op.enabled_hypervisors and
4395 hv_name in self.op.enabled_hypervisors)):
4396 # either this is a new hypervisor, or its parameters have changed
4397 hv_class = hypervisor.GetHypervisorClass(hv_name)
4398 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4399 hv_class.CheckParameterSyntax(hv_params)
4400 _CheckHVParams(self, node_list, hv_name, hv_params)
4403 # no need to check any newly-enabled hypervisors, since the
4404 # defaults have already been checked in the above code-block
4405 for os_name, os_hvp in self.new_os_hvp.items():
4406 for hv_name, hv_params in os_hvp.items():
4407 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4408 # we need to fill in the new os_hvp on top of the actual hv_p
4409 cluster_defaults = self.new_hvparams.get(hv_name, {})
4410 new_osp = objects.FillDict(cluster_defaults, hv_params)
4411 hv_class = hypervisor.GetHypervisorClass(hv_name)
4412 hv_class.CheckParameterSyntax(new_osp)
4413 _CheckHVParams(self, node_list, hv_name, new_osp)
4415 if self.op.default_iallocator:
4416 alloc_script = utils.FindFile(self.op.default_iallocator,
4417 constants.IALLOCATOR_SEARCH_PATH,
4419 if alloc_script is None:
4420 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4421 " specified" % self.op.default_iallocator,
4424 def Exec(self, feedback_fn):
4425 """Change the parameters of the cluster.
4428 if self.op.vg_name is not None:
4429 new_volume = self.op.vg_name
4432 if new_volume != self.cfg.GetVGName():
4433 self.cfg.SetVGName(new_volume)
4435 feedback_fn("Cluster LVM configuration already in desired"
4436 " state, not changing")
4437 if self.op.drbd_helper is not None:
4438 new_helper = self.op.drbd_helper
4441 if new_helper != self.cfg.GetDRBDHelper():
4442 self.cfg.SetDRBDHelper(new_helper)
4444 feedback_fn("Cluster DRBD helper already in desired state,"
4446 if self.op.hvparams:
4447 self.cluster.hvparams = self.new_hvparams
4449 self.cluster.os_hvp = self.new_os_hvp
4450 if self.op.enabled_hypervisors is not None:
4451 self.cluster.hvparams = self.new_hvparams
4452 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4453 if self.op.beparams:
4454 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4455 if self.op.nicparams:
4456 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4458 self.cluster.ipolicy = self.new_ipolicy
4459 if self.op.osparams:
4460 self.cluster.osparams = self.new_osp
4461 if self.op.ndparams:
4462 self.cluster.ndparams = self.new_ndparams
4463 if self.op.diskparams:
4464 self.cluster.diskparams = self.new_diskparams
4465 if self.op.hv_state:
4466 self.cluster.hv_state_static = self.new_hv_state
4467 if self.op.disk_state:
4468 self.cluster.disk_state_static = self.new_disk_state
4470 if self.op.candidate_pool_size is not None:
4471 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4472 # we need to update the pool size here, otherwise the save will fail
4473 _AdjustCandidatePool(self, [])
4475 if self.op.maintain_node_health is not None:
4476 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4477 feedback_fn("Note: CONFD was disabled at build time, node health"
4478 " maintenance is not useful (still enabling it)")
4479 self.cluster.maintain_node_health = self.op.maintain_node_health
4481 if self.op.prealloc_wipe_disks is not None:
4482 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4484 if self.op.add_uids is not None:
4485 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4487 if self.op.remove_uids is not None:
4488 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4490 if self.op.uid_pool is not None:
4491 self.cluster.uid_pool = self.op.uid_pool
4493 if self.op.default_iallocator is not None:
4494 self.cluster.default_iallocator = self.op.default_iallocator
4496 if self.op.reserved_lvs is not None:
4497 self.cluster.reserved_lvs = self.op.reserved_lvs
4499 if self.op.use_external_mip_script is not None:
4500 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4502 def helper_os(aname, mods, desc):
4504 lst = getattr(self.cluster, aname)
4505 for key, val in mods:
4506 if key == constants.DDM_ADD:
4508 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4511 elif key == constants.DDM_REMOVE:
4515 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4517 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4519 if self.op.hidden_os:
4520 helper_os("hidden_os", self.op.hidden_os, "hidden")
4522 if self.op.blacklisted_os:
4523 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4525 if self.op.master_netdev:
4526 master_params = self.cfg.GetMasterNetworkParameters()
4527 ems = self.cfg.GetUseExternalMipScript()
4528 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4529 self.cluster.master_netdev)
4530 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4532 result.Raise("Could not disable the master ip")
4533 feedback_fn("Changing master_netdev from %s to %s" %
4534 (master_params.netdev, self.op.master_netdev))
4535 self.cluster.master_netdev = self.op.master_netdev
4537 if self.op.master_netmask:
4538 master_params = self.cfg.GetMasterNetworkParameters()
4539 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4540 result = self.rpc.call_node_change_master_netmask(master_params.name,
4541 master_params.netmask,
4542 self.op.master_netmask,
4544 master_params.netdev)
4546 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4549 self.cluster.master_netmask = self.op.master_netmask
4551 self.cfg.Update(self.cluster, feedback_fn)
4553 if self.op.master_netdev:
4554 master_params = self.cfg.GetMasterNetworkParameters()
4555 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4556 self.op.master_netdev)
4557 ems = self.cfg.GetUseExternalMipScript()
4558 result = self.rpc.call_node_activate_master_ip(master_params.name,
4561 self.LogWarning("Could not re-enable the master ip on"
4562 " the master, please restart manually: %s",
4566 def _UploadHelper(lu, nodes, fname):
4567 """Helper for uploading a file and showing warnings.
4570 if os.path.exists(fname):
4571 result = lu.rpc.call_upload_file(nodes, fname)
4572 for to_node, to_result in result.items():
4573 msg = to_result.fail_msg
4575 msg = ("Copy of file %s to node %s failed: %s" %
4576 (fname, to_node, msg))
4580 def _ComputeAncillaryFiles(cluster, redist):
4581 """Compute files external to Ganeti which need to be consistent.
4583 @type redist: boolean
4584 @param redist: Whether to include files which need to be redistributed
4587 # Compute files for all nodes
4589 pathutils.SSH_KNOWN_HOSTS_FILE,
4590 pathutils.CONFD_HMAC_KEY,
4591 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4592 pathutils.SPICE_CERT_FILE,
4593 pathutils.SPICE_CACERT_FILE,
4594 pathutils.RAPI_USERS_FILE,
4598 # we need to ship at least the RAPI certificate
4599 files_all.add(pathutils.RAPI_CERT_FILE)
4601 files_all.update(pathutils.ALL_CERT_FILES)
4602 files_all.update(ssconf.SimpleStore().GetFileList())
4604 if cluster.modify_etc_hosts:
4605 files_all.add(pathutils.ETC_HOSTS)
4607 if cluster.use_external_mip_script:
4608 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4610 # Files which are optional, these must:
4611 # - be present in one other category as well
4612 # - either exist or not exist on all nodes of that category (mc, vm all)
4614 pathutils.RAPI_USERS_FILE,
4617 # Files which should only be on master candidates
4621 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4625 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4626 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4627 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4629 # Files which should only be on VM-capable nodes
4632 for hv_name in cluster.enabled_hypervisors
4634 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4638 for hv_name in cluster.enabled_hypervisors
4640 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4642 # Filenames in each category must be unique
4643 all_files_set = files_all | files_mc | files_vm
4644 assert (len(all_files_set) ==
4645 sum(map(len, [files_all, files_mc, files_vm]))), \
4646 "Found file listed in more than one file list"
4648 # Optional files must be present in one other category
4649 assert all_files_set.issuperset(files_opt), \
4650 "Optional file not in a different required list"
4652 # This one file should never ever be re-distributed via RPC
4653 assert not (redist and
4654 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4656 return (files_all, files_opt, files_mc, files_vm)
4659 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4660 """Distribute additional files which are part of the cluster configuration.
4662 ConfigWriter takes care of distributing the config and ssconf files, but
4663 there are more files which should be distributed to all nodes. This function
4664 makes sure those are copied.
4666 @param lu: calling logical unit
4667 @param additional_nodes: list of nodes not in the config to distribute to
4668 @type additional_vm: boolean
4669 @param additional_vm: whether the additional nodes are vm-capable or not
4672 # Gather target nodes
4673 cluster = lu.cfg.GetClusterInfo()
4674 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4676 online_nodes = lu.cfg.GetOnlineNodeList()
4677 online_set = frozenset(online_nodes)
4678 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4680 if additional_nodes is not None:
4681 online_nodes.extend(additional_nodes)
4683 vm_nodes.extend(additional_nodes)
4685 # Never distribute to master node
4686 for nodelist in [online_nodes, vm_nodes]:
4687 if master_info.name in nodelist:
4688 nodelist.remove(master_info.name)
4691 (files_all, _, files_mc, files_vm) = \
4692 _ComputeAncillaryFiles(cluster, True)
4694 # Never re-distribute configuration file from here
4695 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4696 pathutils.CLUSTER_CONF_FILE in files_vm)
4697 assert not files_mc, "Master candidates not handled in this function"
4700 (online_nodes, files_all),
4701 (vm_nodes, files_vm),
4705 for (node_list, files) in filemap:
4707 _UploadHelper(lu, node_list, fname)
4710 class LUClusterRedistConf(NoHooksLU):
4711 """Force the redistribution of cluster configuration.
4713 This is a very simple LU.
4718 def ExpandNames(self):
4719 self.needed_locks = {
4720 locking.LEVEL_NODE: locking.ALL_SET,
4721 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4723 self.share_locks = _ShareAll()
4725 def Exec(self, feedback_fn):
4726 """Redistribute the configuration.
4729 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4730 _RedistributeAncillaryFiles(self)
4733 class LUClusterActivateMasterIp(NoHooksLU):
4734 """Activate the master IP on the master node.
4737 def Exec(self, feedback_fn):
4738 """Activate the master IP.
4741 master_params = self.cfg.GetMasterNetworkParameters()
4742 ems = self.cfg.GetUseExternalMipScript()
4743 result = self.rpc.call_node_activate_master_ip(master_params.name,
4745 result.Raise("Could not activate the master IP")
4748 class LUClusterDeactivateMasterIp(NoHooksLU):
4749 """Deactivate the master IP on the master node.
4752 def Exec(self, feedback_fn):
4753 """Deactivate the master IP.
4756 master_params = self.cfg.GetMasterNetworkParameters()
4757 ems = self.cfg.GetUseExternalMipScript()
4758 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4760 result.Raise("Could not deactivate the master IP")
4763 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4764 """Sleep and poll for an instance's disk to sync.
4767 if not instance.disks or disks is not None and not disks:
4770 disks = _ExpandCheckDisks(instance, disks)
4773 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4775 node = instance.primary_node
4778 lu.cfg.SetDiskID(dev, node)
4780 # TODO: Convert to utils.Retry
4783 degr_retries = 10 # in seconds, as we sleep 1 second each time
4787 cumul_degraded = False
4788 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4789 msg = rstats.fail_msg
4791 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4794 raise errors.RemoteError("Can't contact node %s for mirror data,"
4795 " aborting." % node)
4798 rstats = rstats.payload
4800 for i, mstat in enumerate(rstats):
4802 lu.LogWarning("Can't compute data for node %s/%s",
4803 node, disks[i].iv_name)
4806 cumul_degraded = (cumul_degraded or
4807 (mstat.is_degraded and mstat.sync_percent is None))
4808 if mstat.sync_percent is not None:
4810 if mstat.estimated_time is not None:
4811 rem_time = ("%s remaining (estimated)" %
4812 utils.FormatSeconds(mstat.estimated_time))
4813 max_time = mstat.estimated_time
4815 rem_time = "no time estimate"
4816 lu.LogInfo("- device %s: %5.2f%% done, %s",
4817 disks[i].iv_name, mstat.sync_percent, rem_time)
4819 # if we're done but degraded, let's do a few small retries, to
4820 # make sure we see a stable and not transient situation; therefore
4821 # we force restart of the loop
4822 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4823 logging.info("Degraded disks found, %d retries left", degr_retries)
4831 time.sleep(min(60, max_time))
4834 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4836 return not cumul_degraded
4839 def _BlockdevFind(lu, node, dev, instance):
4840 """Wrapper around call_blockdev_find to annotate diskparams.
4842 @param lu: A reference to the lu object
4843 @param node: The node to call out
4844 @param dev: The device to find
4845 @param instance: The instance object the device belongs to
4846 @returns The result of the rpc call
4849 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4850 return lu.rpc.call_blockdev_find(node, disk)
4853 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4854 """Wrapper around L{_CheckDiskConsistencyInner}.
4857 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4858 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4862 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4864 """Check that mirrors are not degraded.
4866 @attention: The device has to be annotated already.
4868 The ldisk parameter, if True, will change the test from the
4869 is_degraded attribute (which represents overall non-ok status for
4870 the device(s)) to the ldisk (representing the local storage status).
4873 lu.cfg.SetDiskID(dev, node)
4877 if on_primary or dev.AssembleOnSecondary():
4878 rstats = lu.rpc.call_blockdev_find(node, dev)
4879 msg = rstats.fail_msg
4881 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4883 elif not rstats.payload:
4884 lu.LogWarning("Can't find disk on node %s", node)
4888 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4890 result = result and not rstats.payload.is_degraded
4893 for child in dev.children:
4894 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4900 class LUOobCommand(NoHooksLU):
4901 """Logical unit for OOB handling.
4905 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4907 def ExpandNames(self):
4908 """Gather locks we need.
4911 if self.op.node_names:
4912 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4913 lock_names = self.op.node_names
4915 lock_names = locking.ALL_SET
4917 self.needed_locks = {
4918 locking.LEVEL_NODE: lock_names,
4921 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4923 if not self.op.node_names:
4924 # Acquire node allocation lock only if all nodes are affected
4925 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4927 def CheckPrereq(self):
4928 """Check prerequisites.
4931 - the node exists in the configuration
4934 Any errors are signaled by raising errors.OpPrereqError.
4938 self.master_node = self.cfg.GetMasterNode()
4940 assert self.op.power_delay >= 0.0
4942 if self.op.node_names:
4943 if (self.op.command in self._SKIP_MASTER and
4944 self.master_node in self.op.node_names):
4945 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4946 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4948 if master_oob_handler:
4949 additional_text = ("run '%s %s %s' if you want to operate on the"
4950 " master regardless") % (master_oob_handler,
4954 additional_text = "it does not support out-of-band operations"
4956 raise errors.OpPrereqError(("Operating on the master node %s is not"
4957 " allowed for %s; %s") %
4958 (self.master_node, self.op.command,
4959 additional_text), errors.ECODE_INVAL)
4961 self.op.node_names = self.cfg.GetNodeList()
4962 if self.op.command in self._SKIP_MASTER:
4963 self.op.node_names.remove(self.master_node)
4965 if self.op.command in self._SKIP_MASTER:
4966 assert self.master_node not in self.op.node_names
4968 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4970 raise errors.OpPrereqError("Node %s not found" % node_name,
4973 self.nodes.append(node)
4975 if (not self.op.ignore_status and
4976 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4977 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4978 " not marked offline") % node_name,
4981 def Exec(self, feedback_fn):
4982 """Execute OOB and return result if we expect any.
4985 master_node = self.master_node
4988 for idx, node in enumerate(utils.NiceSort(self.nodes,
4989 key=lambda node: node.name)):
4990 node_entry = [(constants.RS_NORMAL, node.name)]
4991 ret.append(node_entry)
4993 oob_program = _SupportsOob(self.cfg, node)
4996 node_entry.append((constants.RS_UNAVAIL, None))
4999 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5000 self.op.command, oob_program, node.name)
5001 result = self.rpc.call_run_oob(master_node, oob_program,
5002 self.op.command, node.name,
5006 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5007 node.name, result.fail_msg)
5008 node_entry.append((constants.RS_NODATA, None))
5011 self._CheckPayload(result)
5012 except errors.OpExecError, err:
5013 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5015 node_entry.append((constants.RS_NODATA, None))
5017 if self.op.command == constants.OOB_HEALTH:
5018 # For health we should log important events
5019 for item, status in result.payload:
5020 if status in [constants.OOB_STATUS_WARNING,
5021 constants.OOB_STATUS_CRITICAL]:
5022 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5023 item, node.name, status)
5025 if self.op.command == constants.OOB_POWER_ON:
5027 elif self.op.command == constants.OOB_POWER_OFF:
5028 node.powered = False
5029 elif self.op.command == constants.OOB_POWER_STATUS:
5030 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5031 if powered != node.powered:
5032 logging.warning(("Recorded power state (%s) of node '%s' does not"
5033 " match actual power state (%s)"), node.powered,
5036 # For configuration changing commands we should update the node
5037 if self.op.command in (constants.OOB_POWER_ON,
5038 constants.OOB_POWER_OFF):
5039 self.cfg.Update(node, feedback_fn)
5041 node_entry.append((constants.RS_NORMAL, result.payload))
5043 if (self.op.command == constants.OOB_POWER_ON and
5044 idx < len(self.nodes) - 1):
5045 time.sleep(self.op.power_delay)
5049 def _CheckPayload(self, result):
5050 """Checks if the payload is valid.
5052 @param result: RPC result
5053 @raises errors.OpExecError: If payload is not valid
5057 if self.op.command == constants.OOB_HEALTH:
5058 if not isinstance(result.payload, list):
5059 errs.append("command 'health' is expected to return a list but got %s" %
5060 type(result.payload))
5062 for item, status in result.payload:
5063 if status not in constants.OOB_STATUSES:
5064 errs.append("health item '%s' has invalid status '%s'" %
5067 if self.op.command == constants.OOB_POWER_STATUS:
5068 if not isinstance(result.payload, dict):
5069 errs.append("power-status is expected to return a dict but got %s" %
5070 type(result.payload))
5072 if self.op.command in [
5073 constants.OOB_POWER_ON,
5074 constants.OOB_POWER_OFF,
5075 constants.OOB_POWER_CYCLE,
5077 if result.payload is not None:
5078 errs.append("%s is expected to not return payload but got '%s'" %
5079 (self.op.command, result.payload))
5082 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5083 utils.CommaJoin(errs))
5086 class _OsQuery(_QueryBase):
5087 FIELDS = query.OS_FIELDS
5089 def ExpandNames(self, lu):
5090 # Lock all nodes in shared mode
5091 # Temporary removal of locks, should be reverted later
5092 # TODO: reintroduce locks when they are lighter-weight
5093 lu.needed_locks = {}
5094 #self.share_locks[locking.LEVEL_NODE] = 1
5095 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5097 # The following variables interact with _QueryBase._GetNames
5099 self.wanted = self.names
5101 self.wanted = locking.ALL_SET
5103 self.do_locking = self.use_locking
5105 def DeclareLocks(self, lu, level):
5109 def _DiagnoseByOS(rlist):
5110 """Remaps a per-node return list into an a per-os per-node dictionary
5112 @param rlist: a map with node names as keys and OS objects as values
5115 @return: a dictionary with osnames as keys and as value another
5116 map, with nodes as keys and tuples of (path, status, diagnose,
5117 variants, parameters, api_versions) as values, eg::
5119 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5120 (/srv/..., False, "invalid api")],
5121 "node2": [(/srv/..., True, "", [], [])]}
5126 # we build here the list of nodes that didn't fail the RPC (at RPC
5127 # level), so that nodes with a non-responding node daemon don't
5128 # make all OSes invalid
5129 good_nodes = [node_name for node_name in rlist
5130 if not rlist[node_name].fail_msg]
5131 for node_name, nr in rlist.items():
5132 if nr.fail_msg or not nr.payload:
5134 for (name, path, status, diagnose, variants,
5135 params, api_versions) in nr.payload:
5136 if name not in all_os:
5137 # build a list of nodes for this os containing empty lists
5138 # for each node in node_list
5140 for nname in good_nodes:
5141 all_os[name][nname] = []
5142 # convert params from [name, help] to (name, help)
5143 params = [tuple(v) for v in params]
5144 all_os[name][node_name].append((path, status, diagnose,
5145 variants, params, api_versions))
5148 def _GetQueryData(self, lu):
5149 """Computes the list of nodes and their attributes.
5152 # Locking is not used
5153 assert not (compat.any(lu.glm.is_owned(level)
5154 for level in locking.LEVELS
5155 if level != locking.LEVEL_CLUSTER) or
5156 self.do_locking or self.use_locking)
5158 valid_nodes = [node.name
5159 for node in lu.cfg.GetAllNodesInfo().values()
5160 if not node.offline and node.vm_capable]
5161 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5162 cluster = lu.cfg.GetClusterInfo()
5166 for (os_name, os_data) in pol.items():
5167 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5168 hidden=(os_name in cluster.hidden_os),
5169 blacklisted=(os_name in cluster.blacklisted_os))
5173 api_versions = set()
5175 for idx, osl in enumerate(os_data.values()):
5176 info.valid = bool(info.valid and osl and osl[0][1])
5180 (node_variants, node_params, node_api) = osl[0][3:6]
5183 variants.update(node_variants)
5184 parameters.update(node_params)
5185 api_versions.update(node_api)
5187 # Filter out inconsistent values
5188 variants.intersection_update(node_variants)
5189 parameters.intersection_update(node_params)
5190 api_versions.intersection_update(node_api)
5192 info.variants = list(variants)
5193 info.parameters = list(parameters)
5194 info.api_versions = list(api_versions)
5196 data[os_name] = info
5198 # Prepare data in requested order
5199 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5203 class LUOsDiagnose(NoHooksLU):
5204 """Logical unit for OS diagnose/query.
5210 def _BuildFilter(fields, names):
5211 """Builds a filter for querying OSes.
5214 name_filter = qlang.MakeSimpleFilter("name", names)
5216 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5217 # respective field is not requested
5218 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5219 for fname in ["hidden", "blacklisted"]
5220 if fname not in fields]
5221 if "valid" not in fields:
5222 status_filter.append([qlang.OP_TRUE, "valid"])
5225 status_filter.insert(0, qlang.OP_AND)
5227 status_filter = None
5229 if name_filter and status_filter:
5230 return [qlang.OP_AND, name_filter, status_filter]
5234 return status_filter
5236 def CheckArguments(self):
5237 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5238 self.op.output_fields, False)
5240 def ExpandNames(self):
5241 self.oq.ExpandNames(self)
5243 def Exec(self, feedback_fn):
5244 return self.oq.OldStyleQuery(self)
5247 class _ExtStorageQuery(_QueryBase):
5248 FIELDS = query.EXTSTORAGE_FIELDS
5250 def ExpandNames(self, lu):
5251 # Lock all nodes in shared mode
5252 # Temporary removal of locks, should be reverted later
5253 # TODO: reintroduce locks when they are lighter-weight
5254 lu.needed_locks = {}
5255 #self.share_locks[locking.LEVEL_NODE] = 1
5256 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5258 # The following variables interact with _QueryBase._GetNames
5260 self.wanted = self.names
5262 self.wanted = locking.ALL_SET
5264 self.do_locking = self.use_locking
5266 def DeclareLocks(self, lu, level):
5270 def _DiagnoseByProvider(rlist):
5271 """Remaps a per-node return list into an a per-provider per-node dictionary
5273 @param rlist: a map with node names as keys and ExtStorage objects as values
5276 @return: a dictionary with extstorage providers as keys and as
5277 value another map, with nodes as keys and tuples of
5278 (path, status, diagnose, parameters) as values, eg::
5280 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5281 "node2": [(/srv/..., False, "missing file")]
5282 "node3": [(/srv/..., True, "", [])]
5287 # we build here the list of nodes that didn't fail the RPC (at RPC
5288 # level), so that nodes with a non-responding node daemon don't
5289 # make all OSes invalid
5290 good_nodes = [node_name for node_name in rlist
5291 if not rlist[node_name].fail_msg]
5292 for node_name, nr in rlist.items():
5293 if nr.fail_msg or not nr.payload:
5295 for (name, path, status, diagnose, params) in nr.payload:
5296 if name not in all_es:
5297 # build a list of nodes for this os containing empty lists
5298 # for each node in node_list
5300 for nname in good_nodes:
5301 all_es[name][nname] = []
5302 # convert params from [name, help] to (name, help)
5303 params = [tuple(v) for v in params]
5304 all_es[name][node_name].append((path, status, diagnose, params))
5307 def _GetQueryData(self, lu):
5308 """Computes the list of nodes and their attributes.
5311 # Locking is not used
5312 assert not (compat.any(lu.glm.is_owned(level)
5313 for level in locking.LEVELS
5314 if level != locking.LEVEL_CLUSTER) or
5315 self.do_locking or self.use_locking)
5317 valid_nodes = [node.name
5318 for node in lu.cfg.GetAllNodesInfo().values()
5319 if not node.offline and node.vm_capable]
5320 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5324 nodegroup_list = lu.cfg.GetNodeGroupList()
5326 for (es_name, es_data) in pol.items():
5327 # For every provider compute the nodegroup validity.
5328 # To do this we need to check the validity of each node in es_data
5329 # and then construct the corresponding nodegroup dict:
5330 # { nodegroup1: status
5331 # nodegroup2: status
5334 for nodegroup in nodegroup_list:
5335 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5337 nodegroup_nodes = ndgrp.members
5338 nodegroup_name = ndgrp.name
5341 for node in nodegroup_nodes:
5342 if node in valid_nodes:
5343 if es_data[node] != []:
5344 node_status = es_data[node][0][1]
5345 node_statuses.append(node_status)
5347 node_statuses.append(False)
5349 if False in node_statuses:
5350 ndgrp_data[nodegroup_name] = False
5352 ndgrp_data[nodegroup_name] = True
5354 # Compute the provider's parameters
5356 for idx, esl in enumerate(es_data.values()):
5357 valid = bool(esl and esl[0][1])
5361 node_params = esl[0][3]
5364 parameters.update(node_params)
5366 # Filter out inconsistent values
5367 parameters.intersection_update(node_params)
5369 params = list(parameters)
5371 # Now fill all the info for this provider
5372 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5373 nodegroup_status=ndgrp_data,
5376 data[es_name] = info
5378 # Prepare data in requested order
5379 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5383 class LUExtStorageDiagnose(NoHooksLU):
5384 """Logical unit for ExtStorage diagnose/query.
5389 def CheckArguments(self):
5390 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5391 self.op.output_fields, False)
5393 def ExpandNames(self):
5394 self.eq.ExpandNames(self)
5396 def Exec(self, feedback_fn):
5397 return self.eq.OldStyleQuery(self)
5400 class LUNodeRemove(LogicalUnit):
5401 """Logical unit for removing a node.
5404 HPATH = "node-remove"
5405 HTYPE = constants.HTYPE_NODE
5407 def BuildHooksEnv(self):
5412 "OP_TARGET": self.op.node_name,
5413 "NODE_NAME": self.op.node_name,
5416 def BuildHooksNodes(self):
5417 """Build hooks nodes.
5419 This doesn't run on the target node in the pre phase as a failed
5420 node would then be impossible to remove.
5423 all_nodes = self.cfg.GetNodeList()
5425 all_nodes.remove(self.op.node_name)
5428 return (all_nodes, all_nodes)
5430 def CheckPrereq(self):
5431 """Check prerequisites.
5434 - the node exists in the configuration
5435 - it does not have primary or secondary instances
5436 - it's not the master
5438 Any errors are signaled by raising errors.OpPrereqError.
5441 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5442 node = self.cfg.GetNodeInfo(self.op.node_name)
5443 assert node is not None
5445 masternode = self.cfg.GetMasterNode()
5446 if node.name == masternode:
5447 raise errors.OpPrereqError("Node is the master node, failover to another"
5448 " node is required", errors.ECODE_INVAL)
5450 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5451 if node.name in instance.all_nodes:
5452 raise errors.OpPrereqError("Instance %s is still running on the node,"
5453 " please remove first" % instance_name,
5455 self.op.node_name = node.name
5458 def Exec(self, feedback_fn):
5459 """Removes the node from the cluster.
5463 logging.info("Stopping the node daemon and removing configs from node %s",
5466 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5468 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5471 # Promote nodes to master candidate as needed
5472 _AdjustCandidatePool(self, exceptions=[node.name])
5473 self.context.RemoveNode(node.name)
5475 # Run post hooks on the node before it's removed
5476 _RunPostHook(self, node.name)
5478 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5479 msg = result.fail_msg
5481 self.LogWarning("Errors encountered on the remote node while leaving"
5482 " the cluster: %s", msg)
5484 # Remove node from our /etc/hosts
5485 if self.cfg.GetClusterInfo().modify_etc_hosts:
5486 master_node = self.cfg.GetMasterNode()
5487 result = self.rpc.call_etc_hosts_modify(master_node,
5488 constants.ETC_HOSTS_REMOVE,
5490 result.Raise("Can't update hosts file with new host data")
5491 _RedistributeAncillaryFiles(self)
5494 class _NodeQuery(_QueryBase):
5495 FIELDS = query.NODE_FIELDS
5497 def ExpandNames(self, lu):
5498 lu.needed_locks = {}
5499 lu.share_locks = _ShareAll()
5502 self.wanted = _GetWantedNodes(lu, self.names)
5504 self.wanted = locking.ALL_SET
5506 self.do_locking = (self.use_locking and
5507 query.NQ_LIVE in self.requested_data)
5510 # If any non-static field is requested we need to lock the nodes
5511 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5512 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5514 def DeclareLocks(self, lu, level):
5517 def _GetQueryData(self, lu):
5518 """Computes the list of nodes and their attributes.
5521 all_info = lu.cfg.GetAllNodesInfo()
5523 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5525 # Gather data as requested
5526 if query.NQ_LIVE in self.requested_data:
5527 # filter out non-vm_capable nodes
5528 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5530 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5531 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5532 [lu.cfg.GetHypervisorType()], es_flags)
5533 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5534 for (name, nresult) in node_data.items()
5535 if not nresult.fail_msg and nresult.payload)
5539 if query.NQ_INST in self.requested_data:
5540 node_to_primary = dict([(name, set()) for name in nodenames])
5541 node_to_secondary = dict([(name, set()) for name in nodenames])
5543 inst_data = lu.cfg.GetAllInstancesInfo()
5545 for inst in inst_data.values():
5546 if inst.primary_node in node_to_primary:
5547 node_to_primary[inst.primary_node].add(inst.name)
5548 for secnode in inst.secondary_nodes:
5549 if secnode in node_to_secondary:
5550 node_to_secondary[secnode].add(inst.name)
5552 node_to_primary = None
5553 node_to_secondary = None
5555 if query.NQ_OOB in self.requested_data:
5556 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5557 for name, node in all_info.iteritems())
5561 if query.NQ_GROUP in self.requested_data:
5562 groups = lu.cfg.GetAllNodeGroupsInfo()
5566 return query.NodeQueryData([all_info[name] for name in nodenames],
5567 live_data, lu.cfg.GetMasterNode(),
5568 node_to_primary, node_to_secondary, groups,
5569 oob_support, lu.cfg.GetClusterInfo())
5572 class LUNodeQuery(NoHooksLU):
5573 """Logical unit for querying nodes.
5576 # pylint: disable=W0142
5579 def CheckArguments(self):
5580 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5581 self.op.output_fields, self.op.use_locking)
5583 def ExpandNames(self):
5584 self.nq.ExpandNames(self)
5586 def DeclareLocks(self, level):
5587 self.nq.DeclareLocks(self, level)
5589 def Exec(self, feedback_fn):
5590 return self.nq.OldStyleQuery(self)
5593 class LUNodeQueryvols(NoHooksLU):
5594 """Logical unit for getting volumes on node(s).
5598 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5599 _FIELDS_STATIC = utils.FieldSet("node")
5601 def CheckArguments(self):
5602 _CheckOutputFields(static=self._FIELDS_STATIC,
5603 dynamic=self._FIELDS_DYNAMIC,
5604 selected=self.op.output_fields)
5606 def ExpandNames(self):
5607 self.share_locks = _ShareAll()
5610 self.needed_locks = {
5611 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5614 self.needed_locks = {
5615 locking.LEVEL_NODE: locking.ALL_SET,
5616 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5619 def Exec(self, feedback_fn):
5620 """Computes the list of nodes and their attributes.
5623 nodenames = self.owned_locks(locking.LEVEL_NODE)
5624 volumes = self.rpc.call_node_volumes(nodenames)
5626 ilist = self.cfg.GetAllInstancesInfo()
5627 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5630 for node in nodenames:
5631 nresult = volumes[node]
5634 msg = nresult.fail_msg
5636 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5639 node_vols = sorted(nresult.payload,
5640 key=operator.itemgetter("dev"))
5642 for vol in node_vols:
5644 for field in self.op.output_fields:
5647 elif field == "phys":
5651 elif field == "name":
5653 elif field == "size":
5654 val = int(float(vol["size"]))
5655 elif field == "instance":
5656 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5658 raise errors.ParameterError(field)
5659 node_output.append(str(val))
5661 output.append(node_output)
5666 class LUNodeQueryStorage(NoHooksLU):
5667 """Logical unit for getting information on storage units on node(s).
5670 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5673 def CheckArguments(self):
5674 _CheckOutputFields(static=self._FIELDS_STATIC,
5675 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5676 selected=self.op.output_fields)
5678 def ExpandNames(self):
5679 self.share_locks = _ShareAll()
5682 self.needed_locks = {
5683 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5686 self.needed_locks = {
5687 locking.LEVEL_NODE: locking.ALL_SET,
5688 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5691 def Exec(self, feedback_fn):
5692 """Computes the list of nodes and their attributes.
5695 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5697 # Always get name to sort by
5698 if constants.SF_NAME in self.op.output_fields:
5699 fields = self.op.output_fields[:]
5701 fields = [constants.SF_NAME] + self.op.output_fields
5703 # Never ask for node or type as it's only known to the LU
5704 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5705 while extra in fields:
5706 fields.remove(extra)
5708 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5709 name_idx = field_idx[constants.SF_NAME]
5711 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5712 data = self.rpc.call_storage_list(self.nodes,
5713 self.op.storage_type, st_args,
5714 self.op.name, fields)
5718 for node in utils.NiceSort(self.nodes):
5719 nresult = data[node]
5723 msg = nresult.fail_msg
5725 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5728 rows = dict([(row[name_idx], row) for row in nresult.payload])
5730 for name in utils.NiceSort(rows.keys()):
5735 for field in self.op.output_fields:
5736 if field == constants.SF_NODE:
5738 elif field == constants.SF_TYPE:
5739 val = self.op.storage_type
5740 elif field in field_idx:
5741 val = row[field_idx[field]]
5743 raise errors.ParameterError(field)
5752 class _InstanceQuery(_QueryBase):
5753 FIELDS = query.INSTANCE_FIELDS
5755 def ExpandNames(self, lu):
5756 lu.needed_locks = {}
5757 lu.share_locks = _ShareAll()
5760 self.wanted = _GetWantedInstances(lu, self.names)
5762 self.wanted = locking.ALL_SET
5764 self.do_locking = (self.use_locking and
5765 query.IQ_LIVE in self.requested_data)
5767 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5768 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5769 lu.needed_locks[locking.LEVEL_NODE] = []
5770 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5772 self.do_grouplocks = (self.do_locking and
5773 query.IQ_NODES in self.requested_data)
5775 def DeclareLocks(self, lu, level):
5777 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5778 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5780 # Lock all groups used by instances optimistically; this requires going
5781 # via the node before it's locked, requiring verification later on
5782 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5784 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5785 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5786 elif level == locking.LEVEL_NODE:
5787 lu._LockInstancesNodes() # pylint: disable=W0212
5790 def _CheckGroupLocks(lu):
5791 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5792 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5794 # Check if node groups for locked instances are still correct
5795 for instance_name in owned_instances:
5796 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5798 def _GetQueryData(self, lu):
5799 """Computes the list of instances and their attributes.
5802 if self.do_grouplocks:
5803 self._CheckGroupLocks(lu)
5805 cluster = lu.cfg.GetClusterInfo()
5806 all_info = lu.cfg.GetAllInstancesInfo()
5808 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5810 instance_list = [all_info[name] for name in instance_names]
5811 nodes = frozenset(itertools.chain(*(inst.all_nodes
5812 for inst in instance_list)))
5813 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5816 wrongnode_inst = set()
5818 # Gather data as requested
5819 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5821 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5823 result = node_data[name]
5825 # offline nodes will be in both lists
5826 assert result.fail_msg
5827 offline_nodes.append(name)
5829 bad_nodes.append(name)
5830 elif result.payload:
5831 for inst in result.payload:
5832 if inst in all_info:
5833 if all_info[inst].primary_node == name:
5834 live_data.update(result.payload)
5836 wrongnode_inst.add(inst)
5838 # orphan instance; we don't list it here as we don't
5839 # handle this case yet in the output of instance listing
5840 logging.warning("Orphan instance '%s' found on node %s",
5842 # else no instance is alive
5846 if query.IQ_DISKUSAGE in self.requested_data:
5847 gmi = ganeti.masterd.instance
5848 disk_usage = dict((inst.name,
5849 gmi.ComputeDiskSize(inst.disk_template,
5850 [{constants.IDISK_SIZE: disk.size}
5851 for disk in inst.disks]))
5852 for inst in instance_list)
5856 if query.IQ_CONSOLE in self.requested_data:
5858 for inst in instance_list:
5859 if inst.name in live_data:
5860 # Instance is running
5861 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5863 consinfo[inst.name] = None
5864 assert set(consinfo.keys()) == set(instance_names)
5868 if query.IQ_NODES in self.requested_data:
5869 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5871 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5872 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5873 for uuid in set(map(operator.attrgetter("group"),
5879 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5880 disk_usage, offline_nodes, bad_nodes,
5881 live_data, wrongnode_inst, consinfo,
5885 class LUQuery(NoHooksLU):
5886 """Query for resources/items of a certain kind.
5889 # pylint: disable=W0142
5892 def CheckArguments(self):
5893 qcls = _GetQueryImplementation(self.op.what)
5895 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5897 def ExpandNames(self):
5898 self.impl.ExpandNames(self)
5900 def DeclareLocks(self, level):
5901 self.impl.DeclareLocks(self, level)
5903 def Exec(self, feedback_fn):
5904 return self.impl.NewStyleQuery(self)
5907 class LUQueryFields(NoHooksLU):
5908 """Query for resources/items of a certain kind.
5911 # pylint: disable=W0142
5914 def CheckArguments(self):
5915 self.qcls = _GetQueryImplementation(self.op.what)
5917 def ExpandNames(self):
5918 self.needed_locks = {}
5920 def Exec(self, feedback_fn):
5921 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5924 class LUNodeModifyStorage(NoHooksLU):
5925 """Logical unit for modifying a storage volume on a node.
5930 def CheckArguments(self):
5931 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5933 storage_type = self.op.storage_type
5936 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5938 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5939 " modified" % storage_type,
5942 diff = set(self.op.changes.keys()) - modifiable
5944 raise errors.OpPrereqError("The following fields can not be modified for"
5945 " storage units of type '%s': %r" %
5946 (storage_type, list(diff)),
5949 def ExpandNames(self):
5950 self.needed_locks = {
5951 locking.LEVEL_NODE: self.op.node_name,
5954 def Exec(self, feedback_fn):
5955 """Computes the list of nodes and their attributes.
5958 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5959 result = self.rpc.call_storage_modify(self.op.node_name,
5960 self.op.storage_type, st_args,
5961 self.op.name, self.op.changes)
5962 result.Raise("Failed to modify storage unit '%s' on %s" %
5963 (self.op.name, self.op.node_name))
5966 class LUNodeAdd(LogicalUnit):
5967 """Logical unit for adding node to the cluster.
5971 HTYPE = constants.HTYPE_NODE
5972 _NFLAGS = ["master_capable", "vm_capable"]
5974 def CheckArguments(self):
5975 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5976 # validate/normalize the node name
5977 self.hostname = netutils.GetHostname(name=self.op.node_name,
5978 family=self.primary_ip_family)
5979 self.op.node_name = self.hostname.name
5981 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5982 raise errors.OpPrereqError("Cannot readd the master node",
5985 if self.op.readd and self.op.group:
5986 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5987 " being readded", errors.ECODE_INVAL)
5989 def BuildHooksEnv(self):
5992 This will run on all nodes before, and on all nodes + the new node after.
5996 "OP_TARGET": self.op.node_name,
5997 "NODE_NAME": self.op.node_name,
5998 "NODE_PIP": self.op.primary_ip,
5999 "NODE_SIP": self.op.secondary_ip,
6000 "MASTER_CAPABLE": str(self.op.master_capable),
6001 "VM_CAPABLE": str(self.op.vm_capable),
6004 def BuildHooksNodes(self):
6005 """Build hooks nodes.
6008 # Exclude added node
6009 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6010 post_nodes = pre_nodes + [self.op.node_name, ]
6012 return (pre_nodes, post_nodes)
6014 def CheckPrereq(self):
6015 """Check prerequisites.
6018 - the new node is not already in the config
6020 - its parameters (single/dual homed) matches the cluster
6022 Any errors are signaled by raising errors.OpPrereqError.
6026 hostname = self.hostname
6027 node = hostname.name
6028 primary_ip = self.op.primary_ip = hostname.ip
6029 if self.op.secondary_ip is None:
6030 if self.primary_ip_family == netutils.IP6Address.family:
6031 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6032 " IPv4 address must be given as secondary",
6034 self.op.secondary_ip = primary_ip
6036 secondary_ip = self.op.secondary_ip
6037 if not netutils.IP4Address.IsValid(secondary_ip):
6038 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6039 " address" % secondary_ip, errors.ECODE_INVAL)
6041 node_list = cfg.GetNodeList()
6042 if not self.op.readd and node in node_list:
6043 raise errors.OpPrereqError("Node %s is already in the configuration" %
6044 node, errors.ECODE_EXISTS)
6045 elif self.op.readd and node not in node_list:
6046 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6049 self.changed_primary_ip = False
6051 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6052 if self.op.readd and node == existing_node_name:
6053 if existing_node.secondary_ip != secondary_ip:
6054 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6055 " address configuration as before",
6057 if existing_node.primary_ip != primary_ip:
6058 self.changed_primary_ip = True
6062 if (existing_node.primary_ip == primary_ip or
6063 existing_node.secondary_ip == primary_ip or
6064 existing_node.primary_ip == secondary_ip or
6065 existing_node.secondary_ip == secondary_ip):
6066 raise errors.OpPrereqError("New node ip address(es) conflict with"
6067 " existing node %s" % existing_node.name,
6068 errors.ECODE_NOTUNIQUE)
6070 # After this 'if' block, None is no longer a valid value for the
6071 # _capable op attributes
6073 old_node = self.cfg.GetNodeInfo(node)
6074 assert old_node is not None, "Can't retrieve locked node %s" % node
6075 for attr in self._NFLAGS:
6076 if getattr(self.op, attr) is None:
6077 setattr(self.op, attr, getattr(old_node, attr))
6079 for attr in self._NFLAGS:
6080 if getattr(self.op, attr) is None:
6081 setattr(self.op, attr, True)
6083 if self.op.readd and not self.op.vm_capable:
6084 pri, sec = cfg.GetNodeInstances(node)
6086 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6087 " flag set to false, but it already holds"
6088 " instances" % node,
6091 # check that the type of the node (single versus dual homed) is the
6092 # same as for the master
6093 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6094 master_singlehomed = myself.secondary_ip == myself.primary_ip
6095 newbie_singlehomed = secondary_ip == primary_ip
6096 if master_singlehomed != newbie_singlehomed:
6097 if master_singlehomed:
6098 raise errors.OpPrereqError("The master has no secondary ip but the"
6099 " new node has one",
6102 raise errors.OpPrereqError("The master has a secondary ip but the"
6103 " new node doesn't have one",
6106 # checks reachability
6107 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6108 raise errors.OpPrereqError("Node not reachable by ping",
6109 errors.ECODE_ENVIRON)
6111 if not newbie_singlehomed:
6112 # check reachability from my secondary ip to newbie's secondary ip
6113 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6114 source=myself.secondary_ip):
6115 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6116 " based ping to node daemon port",
6117 errors.ECODE_ENVIRON)
6124 if self.op.master_capable:
6125 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6127 self.master_candidate = False
6130 self.new_node = old_node
6132 node_group = cfg.LookupNodeGroup(self.op.group)
6133 self.new_node = objects.Node(name=node,
6134 primary_ip=primary_ip,
6135 secondary_ip=secondary_ip,
6136 master_candidate=self.master_candidate,
6137 offline=False, drained=False,
6138 group=node_group, ndparams={})
6140 if self.op.ndparams:
6141 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6143 if self.op.hv_state:
6144 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6146 if self.op.disk_state:
6147 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6149 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150 # it a property on the base class.
6151 rpcrunner = rpc.DnsOnlyRunner()
6152 result = rpcrunner.call_version([node])[node]
6153 result.Raise("Can't get version information from node %s" % node)
6154 if constants.PROTOCOL_VERSION == result.payload:
6155 logging.info("Communication to node %s fine, sw version %s match",
6156 node, result.payload)
6158 raise errors.OpPrereqError("Version mismatch master version %s,"
6159 " node version %s" %
6160 (constants.PROTOCOL_VERSION, result.payload),
6161 errors.ECODE_ENVIRON)
6163 vg_name = cfg.GetVGName()
6164 if vg_name is not None:
6165 vparams = {constants.NV_PVLIST: [vg_name]}
6166 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167 if self.op.ndparams:
6168 excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6170 cname = self.cfg.GetClusterName()
6171 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6172 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6174 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6175 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6177 def Exec(self, feedback_fn):
6178 """Adds the new node to the cluster.
6181 new_node = self.new_node
6182 node = new_node.name
6184 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6187 # We adding a new node so we assume it's powered
6188 new_node.powered = True
6190 # for re-adds, reset the offline/drained/master-candidate flags;
6191 # we need to reset here, otherwise offline would prevent RPC calls
6192 # later in the procedure; this also means that if the re-add
6193 # fails, we are left with a non-offlined, broken node
6195 new_node.drained = new_node.offline = False # pylint: disable=W0201
6196 self.LogInfo("Readding a node, the offline/drained flags were reset")
6197 # if we demote the node, we do cleanup later in the procedure
6198 new_node.master_candidate = self.master_candidate
6199 if self.changed_primary_ip:
6200 new_node.primary_ip = self.op.primary_ip
6202 # copy the master/vm_capable flags
6203 for attr in self._NFLAGS:
6204 setattr(new_node, attr, getattr(self.op, attr))
6206 # notify the user about any possible mc promotion
6207 if new_node.master_candidate:
6208 self.LogInfo("Node will be a master candidate")
6210 if self.op.ndparams:
6211 new_node.ndparams = self.op.ndparams
6213 new_node.ndparams = {}
6215 if self.op.hv_state:
6216 new_node.hv_state_static = self.new_hv_state
6218 if self.op.disk_state:
6219 new_node.disk_state_static = self.new_disk_state
6221 # Add node to our /etc/hosts, and add key to known_hosts
6222 if self.cfg.GetClusterInfo().modify_etc_hosts:
6223 master_node = self.cfg.GetMasterNode()
6224 result = self.rpc.call_etc_hosts_modify(master_node,
6225 constants.ETC_HOSTS_ADD,
6228 result.Raise("Can't update hosts file with new host data")
6230 if new_node.secondary_ip != new_node.primary_ip:
6231 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6234 node_verify_list = [self.cfg.GetMasterNode()]
6235 node_verify_param = {
6236 constants.NV_NODELIST: ([node], {}),
6237 # TODO: do a node-net-test as well?
6240 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6241 self.cfg.GetClusterName())
6242 for verifier in node_verify_list:
6243 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6244 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6246 for failed in nl_payload:
6247 feedback_fn("ssh/hostname verification failed"
6248 " (checking from %s): %s" %
6249 (verifier, nl_payload[failed]))
6250 raise errors.OpExecError("ssh/hostname verification failed")
6253 _RedistributeAncillaryFiles(self)
6254 self.context.ReaddNode(new_node)
6255 # make sure we redistribute the config
6256 self.cfg.Update(new_node, feedback_fn)
6257 # and make sure the new node will not have old files around
6258 if not new_node.master_candidate:
6259 result = self.rpc.call_node_demote_from_mc(new_node.name)
6260 msg = result.fail_msg
6262 self.LogWarning("Node failed to demote itself from master"
6263 " candidate status: %s" % msg)
6265 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6266 additional_vm=self.op.vm_capable)
6267 self.context.AddNode(new_node, self.proc.GetECId())
6270 class LUNodeSetParams(LogicalUnit):
6271 """Modifies the parameters of a node.
6273 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6274 to the node role (as _ROLE_*)
6275 @cvar _R2F: a dictionary from node role to tuples of flags
6276 @cvar _FLAGS: a list of attribute names corresponding to the flags
6279 HPATH = "node-modify"
6280 HTYPE = constants.HTYPE_NODE
6282 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6284 (True, False, False): _ROLE_CANDIDATE,
6285 (False, True, False): _ROLE_DRAINED,
6286 (False, False, True): _ROLE_OFFLINE,
6287 (False, False, False): _ROLE_REGULAR,
6289 _R2F = dict((v, k) for k, v in _F2R.items())
6290 _FLAGS = ["master_candidate", "drained", "offline"]
6292 def CheckArguments(self):
6293 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6294 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6295 self.op.master_capable, self.op.vm_capable,
6296 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6298 if all_mods.count(None) == len(all_mods):
6299 raise errors.OpPrereqError("Please pass at least one modification",
6301 if all_mods.count(True) > 1:
6302 raise errors.OpPrereqError("Can't set the node into more than one"
6303 " state at the same time",
6306 # Boolean value that tells us whether we might be demoting from MC
6307 self.might_demote = (self.op.master_candidate is False or
6308 self.op.offline is True or
6309 self.op.drained is True or
6310 self.op.master_capable is False)
6312 if self.op.secondary_ip:
6313 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6314 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6315 " address" % self.op.secondary_ip,
6318 self.lock_all = self.op.auto_promote and self.might_demote
6319 self.lock_instances = self.op.secondary_ip is not None
6321 def _InstanceFilter(self, instance):
6322 """Filter for getting affected instances.
6325 return (instance.disk_template in constants.DTS_INT_MIRROR and
6326 self.op.node_name in instance.all_nodes)
6328 def ExpandNames(self):
6330 self.needed_locks = {
6331 locking.LEVEL_NODE: locking.ALL_SET,
6333 # Block allocations when all nodes are locked
6334 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6337 self.needed_locks = {
6338 locking.LEVEL_NODE: self.op.node_name,
6341 # Since modifying a node can have severe effects on currently running
6342 # operations the resource lock is at least acquired in shared mode
6343 self.needed_locks[locking.LEVEL_NODE_RES] = \
6344 self.needed_locks[locking.LEVEL_NODE]
6346 # Get all locks except nodes in shared mode; they are not used for anything
6347 # but read-only access
6348 self.share_locks = _ShareAll()
6349 self.share_locks[locking.LEVEL_NODE] = 0
6350 self.share_locks[locking.LEVEL_NODE_RES] = 0
6351 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6353 if self.lock_instances:
6354 self.needed_locks[locking.LEVEL_INSTANCE] = \
6355 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6357 def BuildHooksEnv(self):
6360 This runs on the master node.
6364 "OP_TARGET": self.op.node_name,
6365 "MASTER_CANDIDATE": str(self.op.master_candidate),
6366 "OFFLINE": str(self.op.offline),
6367 "DRAINED": str(self.op.drained),
6368 "MASTER_CAPABLE": str(self.op.master_capable),
6369 "VM_CAPABLE": str(self.op.vm_capable),
6372 def BuildHooksNodes(self):
6373 """Build hooks nodes.
6376 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6379 def CheckPrereq(self):
6380 """Check prerequisites.
6382 This only checks the instance list against the existing names.
6385 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6387 if self.lock_instances:
6388 affected_instances = \
6389 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6391 # Verify instance locks
6392 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6393 wanted_instances = frozenset(affected_instances.keys())
6394 if wanted_instances - owned_instances:
6395 raise errors.OpPrereqError("Instances affected by changing node %s's"
6396 " secondary IP address have changed since"
6397 " locks were acquired, wanted '%s', have"
6398 " '%s'; retry the operation" %
6400 utils.CommaJoin(wanted_instances),
6401 utils.CommaJoin(owned_instances)),
6404 affected_instances = None
6406 if (self.op.master_candidate is not None or
6407 self.op.drained is not None or
6408 self.op.offline is not None):
6409 # we can't change the master's node flags
6410 if self.op.node_name == self.cfg.GetMasterNode():
6411 raise errors.OpPrereqError("The master role can be changed"
6412 " only via master-failover",
6415 if self.op.master_candidate and not node.master_capable:
6416 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6417 " it a master candidate" % node.name,
6420 if self.op.vm_capable is False:
6421 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6423 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6424 " the vm_capable flag" % node.name,
6427 if node.master_candidate and self.might_demote and not self.lock_all:
6428 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6429 # check if after removing the current node, we're missing master
6431 (mc_remaining, mc_should, _) = \
6432 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6433 if mc_remaining < mc_should:
6434 raise errors.OpPrereqError("Not enough master candidates, please"
6435 " pass auto promote option to allow"
6436 " promotion (--auto-promote or RAPI"
6437 " auto_promote=True)", errors.ECODE_STATE)
6439 self.old_flags = old_flags = (node.master_candidate,
6440 node.drained, node.offline)
6441 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6442 self.old_role = old_role = self._F2R[old_flags]
6444 # Check for ineffective changes
6445 for attr in self._FLAGS:
6446 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6447 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6448 setattr(self.op, attr, None)
6450 # Past this point, any flag change to False means a transition
6451 # away from the respective state, as only real changes are kept
6453 # TODO: We might query the real power state if it supports OOB
6454 if _SupportsOob(self.cfg, node):
6455 if self.op.offline is False and not (node.powered or
6456 self.op.powered is True):
6457 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6458 " offline status can be reset") %
6459 self.op.node_name, errors.ECODE_STATE)
6460 elif self.op.powered is not None:
6461 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6462 " as it does not support out-of-band"
6463 " handling") % self.op.node_name,
6466 # If we're being deofflined/drained, we'll MC ourself if needed
6467 if (self.op.drained is False or self.op.offline is False or
6468 (self.op.master_capable and not node.master_capable)):
6469 if _DecideSelfPromotion(self):
6470 self.op.master_candidate = True
6471 self.LogInfo("Auto-promoting node to master candidate")
6473 # If we're no longer master capable, we'll demote ourselves from MC
6474 if self.op.master_capable is False and node.master_candidate:
6475 self.LogInfo("Demoting from master candidate")
6476 self.op.master_candidate = False
6479 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6480 if self.op.master_candidate:
6481 new_role = self._ROLE_CANDIDATE
6482 elif self.op.drained:
6483 new_role = self._ROLE_DRAINED
6484 elif self.op.offline:
6485 new_role = self._ROLE_OFFLINE
6486 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6487 # False is still in new flags, which means we're un-setting (the
6489 new_role = self._ROLE_REGULAR
6490 else: # no new flags, nothing, keep old role
6493 self.new_role = new_role
6495 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6496 # Trying to transition out of offline status
6497 result = self.rpc.call_version([node.name])[node.name]
6499 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6500 " to report its version: %s" %
6501 (node.name, result.fail_msg),
6504 self.LogWarning("Transitioning node from offline to online state"
6505 " without using re-add. Please make sure the node"
6508 # When changing the secondary ip, verify if this is a single-homed to
6509 # multi-homed transition or vice versa, and apply the relevant
6511 if self.op.secondary_ip:
6512 # Ok even without locking, because this can't be changed by any LU
6513 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6514 master_singlehomed = master.secondary_ip == master.primary_ip
6515 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6516 if self.op.force and node.name == master.name:
6517 self.LogWarning("Transitioning from single-homed to multi-homed"
6518 " cluster; all nodes will require a secondary IP"
6521 raise errors.OpPrereqError("Changing the secondary ip on a"
6522 " single-homed cluster requires the"
6523 " --force option to be passed, and the"
6524 " target node to be the master",
6526 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6527 if self.op.force and node.name == master.name:
6528 self.LogWarning("Transitioning from multi-homed to single-homed"
6529 " cluster; secondary IP addresses will have to be"
6532 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6533 " same as the primary IP on a multi-homed"
6534 " cluster, unless the --force option is"
6535 " passed, and the target node is the"
6536 " master", errors.ECODE_INVAL)
6538 assert not (frozenset(affected_instances) -
6539 self.owned_locks(locking.LEVEL_INSTANCE))
6542 if affected_instances:
6543 msg = ("Cannot change secondary IP address: offline node has"
6544 " instances (%s) configured to use it" %
6545 utils.CommaJoin(affected_instances.keys()))
6546 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6548 # On online nodes, check that no instances are running, and that
6549 # the node has the new ip and we can reach it.
6550 for instance in affected_instances.values():
6551 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6552 msg="cannot change secondary ip")
6554 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6555 if master.name != node.name:
6556 # check reachability from master secondary ip to new secondary ip
6557 if not netutils.TcpPing(self.op.secondary_ip,
6558 constants.DEFAULT_NODED_PORT,
6559 source=master.secondary_ip):
6560 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6561 " based ping to node daemon port",
6562 errors.ECODE_ENVIRON)
6564 if self.op.ndparams:
6565 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6566 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6567 self.new_ndparams = new_ndparams
6569 if self.op.hv_state:
6570 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6571 self.node.hv_state_static)
6573 if self.op.disk_state:
6574 self.new_disk_state = \
6575 _MergeAndVerifyDiskState(self.op.disk_state,
6576 self.node.disk_state_static)
6578 def Exec(self, feedback_fn):
6583 old_role = self.old_role
6584 new_role = self.new_role
6588 if self.op.ndparams:
6589 node.ndparams = self.new_ndparams
6591 if self.op.powered is not None:
6592 node.powered = self.op.powered
6594 if self.op.hv_state:
6595 node.hv_state_static = self.new_hv_state
6597 if self.op.disk_state:
6598 node.disk_state_static = self.new_disk_state
6600 for attr in ["master_capable", "vm_capable"]:
6601 val = getattr(self.op, attr)
6603 setattr(node, attr, val)
6604 result.append((attr, str(val)))
6606 if new_role != old_role:
6607 # Tell the node to demote itself, if no longer MC and not offline
6608 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6609 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6611 self.LogWarning("Node failed to demote itself: %s", msg)
6613 new_flags = self._R2F[new_role]
6614 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6616 result.append((desc, str(nf)))
6617 (node.master_candidate, node.drained, node.offline) = new_flags
6619 # we locked all nodes, we adjust the CP before updating this node
6621 _AdjustCandidatePool(self, [node.name])
6623 if self.op.secondary_ip:
6624 node.secondary_ip = self.op.secondary_ip
6625 result.append(("secondary_ip", self.op.secondary_ip))
6627 # this will trigger configuration file update, if needed
6628 self.cfg.Update(node, feedback_fn)
6630 # this will trigger job queue propagation or cleanup if the mc
6632 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6633 self.context.ReaddNode(node)
6638 class LUNodePowercycle(NoHooksLU):
6639 """Powercycles a node.
6644 def CheckArguments(self):
6645 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6646 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6647 raise errors.OpPrereqError("The node is the master and the force"
6648 " parameter was not set",
6651 def ExpandNames(self):
6652 """Locking for PowercycleNode.
6654 This is a last-resort option and shouldn't block on other
6655 jobs. Therefore, we grab no locks.
6658 self.needed_locks = {}
6660 def Exec(self, feedback_fn):
6664 result = self.rpc.call_node_powercycle(self.op.node_name,
6665 self.cfg.GetHypervisorType())
6666 result.Raise("Failed to schedule the reboot")
6667 return result.payload
6670 class LUClusterQuery(NoHooksLU):
6671 """Query cluster configuration.
6676 def ExpandNames(self):
6677 self.needed_locks = {}
6679 def Exec(self, feedback_fn):
6680 """Return cluster config.
6683 cluster = self.cfg.GetClusterInfo()
6686 # Filter just for enabled hypervisors
6687 for os_name, hv_dict in cluster.os_hvp.items():
6688 os_hvp[os_name] = {}
6689 for hv_name, hv_params in hv_dict.items():
6690 if hv_name in cluster.enabled_hypervisors:
6691 os_hvp[os_name][hv_name] = hv_params
6693 # Convert ip_family to ip_version
6694 primary_ip_version = constants.IP4_VERSION
6695 if cluster.primary_ip_family == netutils.IP6Address.family:
6696 primary_ip_version = constants.IP6_VERSION
6699 "software_version": constants.RELEASE_VERSION,
6700 "protocol_version": constants.PROTOCOL_VERSION,
6701 "config_version": constants.CONFIG_VERSION,
6702 "os_api_version": max(constants.OS_API_VERSIONS),
6703 "export_version": constants.EXPORT_VERSION,
6704 "architecture": runtime.GetArchInfo(),
6705 "name": cluster.cluster_name,
6706 "master": cluster.master_node,
6707 "default_hypervisor": cluster.primary_hypervisor,
6708 "enabled_hypervisors": cluster.enabled_hypervisors,
6709 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6710 for hypervisor_name in cluster.enabled_hypervisors]),
6712 "beparams": cluster.beparams,
6713 "osparams": cluster.osparams,
6714 "ipolicy": cluster.ipolicy,
6715 "nicparams": cluster.nicparams,
6716 "ndparams": cluster.ndparams,
6717 "diskparams": cluster.diskparams,
6718 "candidate_pool_size": cluster.candidate_pool_size,
6719 "master_netdev": cluster.master_netdev,
6720 "master_netmask": cluster.master_netmask,
6721 "use_external_mip_script": cluster.use_external_mip_script,
6722 "volume_group_name": cluster.volume_group_name,
6723 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6724 "file_storage_dir": cluster.file_storage_dir,
6725 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6726 "maintain_node_health": cluster.maintain_node_health,
6727 "ctime": cluster.ctime,
6728 "mtime": cluster.mtime,
6729 "uuid": cluster.uuid,
6730 "tags": list(cluster.GetTags()),
6731 "uid_pool": cluster.uid_pool,
6732 "default_iallocator": cluster.default_iallocator,
6733 "reserved_lvs": cluster.reserved_lvs,
6734 "primary_ip_version": primary_ip_version,
6735 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6736 "hidden_os": cluster.hidden_os,
6737 "blacklisted_os": cluster.blacklisted_os,
6743 class LUClusterConfigQuery(NoHooksLU):
6744 """Return configuration values.
6749 def CheckArguments(self):
6750 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6752 def ExpandNames(self):
6753 self.cq.ExpandNames(self)
6755 def DeclareLocks(self, level):
6756 self.cq.DeclareLocks(self, level)
6758 def Exec(self, feedback_fn):
6759 result = self.cq.OldStyleQuery(self)
6761 assert len(result) == 1
6766 class _ClusterQuery(_QueryBase):
6767 FIELDS = query.CLUSTER_FIELDS
6769 #: Do not sort (there is only one item)
6772 def ExpandNames(self, lu):
6773 lu.needed_locks = {}
6775 # The following variables interact with _QueryBase._GetNames
6776 self.wanted = locking.ALL_SET
6777 self.do_locking = self.use_locking
6780 raise errors.OpPrereqError("Can not use locking for cluster queries",
6783 def DeclareLocks(self, lu, level):
6786 def _GetQueryData(self, lu):
6787 """Computes the list of nodes and their attributes.
6790 # Locking is not used
6791 assert not (compat.any(lu.glm.is_owned(level)
6792 for level in locking.LEVELS
6793 if level != locking.LEVEL_CLUSTER) or
6794 self.do_locking or self.use_locking)
6796 if query.CQ_CONFIG in self.requested_data:
6797 cluster = lu.cfg.GetClusterInfo()
6799 cluster = NotImplemented
6801 if query.CQ_QUEUE_DRAINED in self.requested_data:
6802 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6804 drain_flag = NotImplemented
6806 if query.CQ_WATCHER_PAUSE in self.requested_data:
6807 master_name = lu.cfg.GetMasterNode()
6809 result = lu.rpc.call_get_watcher_pause(master_name)
6810 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6813 watcher_pause = result.payload
6815 watcher_pause = NotImplemented
6817 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6820 class LUInstanceActivateDisks(NoHooksLU):
6821 """Bring up an instance's disks.
6826 def ExpandNames(self):
6827 self._ExpandAndLockInstance()
6828 self.needed_locks[locking.LEVEL_NODE] = []
6829 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6831 def DeclareLocks(self, level):
6832 if level == locking.LEVEL_NODE:
6833 self._LockInstancesNodes()
6835 def CheckPrereq(self):
6836 """Check prerequisites.
6838 This checks that the instance is in the cluster.
6841 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6842 assert self.instance is not None, \
6843 "Cannot retrieve locked instance %s" % self.op.instance_name
6844 _CheckNodeOnline(self, self.instance.primary_node)
6846 def Exec(self, feedback_fn):
6847 """Activate the disks.
6850 disks_ok, disks_info = \
6851 _AssembleInstanceDisks(self, self.instance,
6852 ignore_size=self.op.ignore_size)
6854 raise errors.OpExecError("Cannot activate block devices")
6856 if self.op.wait_for_sync:
6857 if not _WaitForSync(self, self.instance):
6858 raise errors.OpExecError("Some disks of the instance are degraded!")
6863 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6865 """Prepare the block devices for an instance.
6867 This sets up the block devices on all nodes.
6869 @type lu: L{LogicalUnit}
6870 @param lu: the logical unit on whose behalf we execute
6871 @type instance: L{objects.Instance}
6872 @param instance: the instance for whose disks we assemble
6873 @type disks: list of L{objects.Disk} or None
6874 @param disks: which disks to assemble (or all, if None)
6875 @type ignore_secondaries: boolean
6876 @param ignore_secondaries: if true, errors on secondary nodes
6877 won't result in an error return from the function
6878 @type ignore_size: boolean
6879 @param ignore_size: if true, the current known size of the disk
6880 will not be used during the disk activation, useful for cases
6881 when the size is wrong
6882 @return: False if the operation failed, otherwise a list of
6883 (host, instance_visible_name, node_visible_name)
6884 with the mapping from node devices to instance devices
6889 iname = instance.name
6890 disks = _ExpandCheckDisks(instance, disks)
6892 # With the two passes mechanism we try to reduce the window of
6893 # opportunity for the race condition of switching DRBD to primary
6894 # before handshaking occured, but we do not eliminate it
6896 # The proper fix would be to wait (with some limits) until the
6897 # connection has been made and drbd transitions from WFConnection
6898 # into any other network-connected state (Connected, SyncTarget,
6901 # 1st pass, assemble on all nodes in secondary mode
6902 for idx, inst_disk in enumerate(disks):
6903 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6905 node_disk = node_disk.Copy()
6906 node_disk.UnsetSize()
6907 lu.cfg.SetDiskID(node_disk, node)
6908 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6910 msg = result.fail_msg
6912 is_offline_secondary = (node in instance.secondary_nodes and
6914 lu.LogWarning("Could not prepare block device %s on node %s"
6915 " (is_primary=False, pass=1): %s",
6916 inst_disk.iv_name, node, msg)
6917 if not (ignore_secondaries or is_offline_secondary):
6920 # FIXME: race condition on drbd migration to primary
6922 # 2nd pass, do only the primary node
6923 for idx, inst_disk in enumerate(disks):
6926 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6927 if node != instance.primary_node:
6930 node_disk = node_disk.Copy()
6931 node_disk.UnsetSize()
6932 lu.cfg.SetDiskID(node_disk, node)
6933 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6935 msg = result.fail_msg
6937 lu.LogWarning("Could not prepare block device %s on node %s"
6938 " (is_primary=True, pass=2): %s",
6939 inst_disk.iv_name, node, msg)
6942 dev_path = result.payload
6944 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6946 # leave the disks configured for the primary node
6947 # this is a workaround that would be fixed better by
6948 # improving the logical/physical id handling
6950 lu.cfg.SetDiskID(disk, instance.primary_node)
6952 return disks_ok, device_info
6955 def _StartInstanceDisks(lu, instance, force):
6956 """Start the disks of an instance.
6959 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6960 ignore_secondaries=force)
6962 _ShutdownInstanceDisks(lu, instance)
6963 if force is not None and not force:
6965 hint=("If the message above refers to a secondary node,"
6966 " you can retry the operation using '--force'"))
6967 raise errors.OpExecError("Disk consistency error")
6970 class LUInstanceDeactivateDisks(NoHooksLU):
6971 """Shutdown an instance's disks.
6976 def ExpandNames(self):
6977 self._ExpandAndLockInstance()
6978 self.needed_locks[locking.LEVEL_NODE] = []
6979 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6981 def DeclareLocks(self, level):
6982 if level == locking.LEVEL_NODE:
6983 self._LockInstancesNodes()
6985 def CheckPrereq(self):
6986 """Check prerequisites.
6988 This checks that the instance is in the cluster.
6991 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6992 assert self.instance is not None, \
6993 "Cannot retrieve locked instance %s" % self.op.instance_name
6995 def Exec(self, feedback_fn):
6996 """Deactivate the disks
6999 instance = self.instance
7001 _ShutdownInstanceDisks(self, instance)
7003 _SafeShutdownInstanceDisks(self, instance)
7006 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7007 """Shutdown block devices of an instance.
7009 This function checks if an instance is running, before calling
7010 _ShutdownInstanceDisks.
7013 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7014 _ShutdownInstanceDisks(lu, instance, disks=disks)
7017 def _ExpandCheckDisks(instance, disks):
7018 """Return the instance disks selected by the disks list
7020 @type disks: list of L{objects.Disk} or None
7021 @param disks: selected disks
7022 @rtype: list of L{objects.Disk}
7023 @return: selected instance disks to act on
7027 return instance.disks
7029 if not set(disks).issubset(instance.disks):
7030 raise errors.ProgrammerError("Can only act on disks belonging to the"
7035 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7036 """Shutdown block devices of an instance.
7038 This does the shutdown on all nodes of the instance.
7040 If the ignore_primary is false, errors on the primary node are
7045 disks = _ExpandCheckDisks(instance, disks)
7048 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7049 lu.cfg.SetDiskID(top_disk, node)
7050 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7051 msg = result.fail_msg
7053 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7054 disk.iv_name, node, msg)
7055 if ((node == instance.primary_node and not ignore_primary) or
7056 (node != instance.primary_node and not result.offline)):
7061 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7062 """Checks if a node has enough free memory.
7064 This function checks if a given node has the needed amount of free
7065 memory. In case the node has less memory or we cannot get the
7066 information from the node, this function raises an OpPrereqError
7069 @type lu: C{LogicalUnit}
7070 @param lu: a logical unit from which we get configuration data
7072 @param node: the node to check
7073 @type reason: C{str}
7074 @param reason: string to use in the error message
7075 @type requested: C{int}
7076 @param requested: the amount of memory in MiB to check for
7077 @type hypervisor_name: C{str}
7078 @param hypervisor_name: the hypervisor to ask for memory stats
7080 @return: node current free memory
7081 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7082 we cannot check the node
7085 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7086 nodeinfo[node].Raise("Can't get data from node %s" % node,
7087 prereq=True, ecode=errors.ECODE_ENVIRON)
7088 (_, _, (hv_info, )) = nodeinfo[node].payload
7090 free_mem = hv_info.get("memory_free", None)
7091 if not isinstance(free_mem, int):
7092 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7093 " was '%s'" % (node, free_mem),
7094 errors.ECODE_ENVIRON)
7095 if requested > free_mem:
7096 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7097 " needed %s MiB, available %s MiB" %
7098 (node, reason, requested, free_mem),
7103 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7104 """Checks if nodes have enough free disk space in all the VGs.
7106 This function checks if all given nodes have the needed amount of
7107 free disk. In case any node has less disk or we cannot get the
7108 information from the node, this function raises an OpPrereqError
7111 @type lu: C{LogicalUnit}
7112 @param lu: a logical unit from which we get configuration data
7113 @type nodenames: C{list}
7114 @param nodenames: the list of node names to check
7115 @type req_sizes: C{dict}
7116 @param req_sizes: the hash of vg and corresponding amount of disk in
7118 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7119 or we cannot check the node
7122 for vg, req_size in req_sizes.items():
7123 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7126 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7127 """Checks if nodes have enough free disk space in the specified VG.
7129 This function checks if all given nodes have the needed amount of
7130 free disk. In case any node has less disk or we cannot get the
7131 information from the node, this function raises an OpPrereqError
7134 @type lu: C{LogicalUnit}
7135 @param lu: a logical unit from which we get configuration data
7136 @type nodenames: C{list}
7137 @param nodenames: the list of node names to check
7139 @param vg: the volume group to check
7140 @type requested: C{int}
7141 @param requested: the amount of disk in MiB to check for
7142 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7143 or we cannot check the node
7146 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7147 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7148 for node in nodenames:
7149 info = nodeinfo[node]
7150 info.Raise("Cannot get current information from node %s" % node,
7151 prereq=True, ecode=errors.ECODE_ENVIRON)
7152 (_, (vg_info, ), _) = info.payload
7153 vg_free = vg_info.get("vg_free", None)
7154 if not isinstance(vg_free, int):
7155 raise errors.OpPrereqError("Can't compute free disk space on node"
7156 " %s for vg %s, result was '%s'" %
7157 (node, vg, vg_free), errors.ECODE_ENVIRON)
7158 if requested > vg_free:
7159 raise errors.OpPrereqError("Not enough disk space on target node %s"
7160 " vg %s: required %d MiB, available %d MiB" %
7161 (node, vg, requested, vg_free),
7165 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7166 """Checks if nodes have enough physical CPUs
7168 This function checks if all given nodes have the needed number of
7169 physical CPUs. In case any node has less CPUs or we cannot get the
7170 information from the node, this function raises an OpPrereqError
7173 @type lu: C{LogicalUnit}
7174 @param lu: a logical unit from which we get configuration data
7175 @type nodenames: C{list}
7176 @param nodenames: the list of node names to check
7177 @type requested: C{int}
7178 @param requested: the minimum acceptable number of physical CPUs
7179 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7180 or we cannot check the node
7183 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7184 for node in nodenames:
7185 info = nodeinfo[node]
7186 info.Raise("Cannot get current information from node %s" % node,
7187 prereq=True, ecode=errors.ECODE_ENVIRON)
7188 (_, _, (hv_info, )) = info.payload
7189 num_cpus = hv_info.get("cpu_total", None)
7190 if not isinstance(num_cpus, int):
7191 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7192 " on node %s, result was '%s'" %
7193 (node, num_cpus), errors.ECODE_ENVIRON)
7194 if requested > num_cpus:
7195 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7196 "required" % (node, num_cpus, requested),
7200 class LUInstanceStartup(LogicalUnit):
7201 """Starts an instance.
7204 HPATH = "instance-start"
7205 HTYPE = constants.HTYPE_INSTANCE
7208 def CheckArguments(self):
7210 if self.op.beparams:
7211 # fill the beparams dict
7212 objects.UpgradeBeParams(self.op.beparams)
7213 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7215 def ExpandNames(self):
7216 self._ExpandAndLockInstance()
7217 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7219 def DeclareLocks(self, level):
7220 if level == locking.LEVEL_NODE_RES:
7221 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7223 def BuildHooksEnv(self):
7226 This runs on master, primary and secondary nodes of the instance.
7230 "FORCE": self.op.force,
7233 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7237 def BuildHooksNodes(self):
7238 """Build hooks nodes.
7241 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7244 def CheckPrereq(self):
7245 """Check prerequisites.
7247 This checks that the instance is in the cluster.
7250 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7251 assert self.instance is not None, \
7252 "Cannot retrieve locked instance %s" % self.op.instance_name
7255 if self.op.hvparams:
7256 # check hypervisor parameter syntax (locally)
7257 cluster = self.cfg.GetClusterInfo()
7258 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7259 filled_hvp = cluster.FillHV(instance)
7260 filled_hvp.update(self.op.hvparams)
7261 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7262 hv_type.CheckParameterSyntax(filled_hvp)
7263 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7265 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7267 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7269 if self.primary_offline and self.op.ignore_offline_nodes:
7270 self.LogWarning("Ignoring offline primary node")
7272 if self.op.hvparams or self.op.beparams:
7273 self.LogWarning("Overridden parameters are ignored")
7275 _CheckNodeOnline(self, instance.primary_node)
7277 bep = self.cfg.GetClusterInfo().FillBE(instance)
7278 bep.update(self.op.beparams)
7280 # check bridges existence
7281 _CheckInstanceBridgesExist(self, instance)
7283 remote_info = self.rpc.call_instance_info(instance.primary_node,
7285 instance.hypervisor)
7286 remote_info.Raise("Error checking node %s" % instance.primary_node,
7287 prereq=True, ecode=errors.ECODE_ENVIRON)
7288 if not remote_info.payload: # not running already
7289 _CheckNodeFreeMemory(self, instance.primary_node,
7290 "starting instance %s" % instance.name,
7291 bep[constants.BE_MINMEM], instance.hypervisor)
7293 def Exec(self, feedback_fn):
7294 """Start the instance.
7297 instance = self.instance
7298 force = self.op.force
7300 if not self.op.no_remember:
7301 self.cfg.MarkInstanceUp(instance.name)
7303 if self.primary_offline:
7304 assert self.op.ignore_offline_nodes
7305 self.LogInfo("Primary node offline, marked instance as started")
7307 node_current = instance.primary_node
7309 _StartInstanceDisks(self, instance, force)
7312 self.rpc.call_instance_start(node_current,
7313 (instance, self.op.hvparams,
7315 self.op.startup_paused)
7316 msg = result.fail_msg
7318 _ShutdownInstanceDisks(self, instance)
7319 raise errors.OpExecError("Could not start instance: %s" % msg)
7322 class LUInstanceReboot(LogicalUnit):
7323 """Reboot an instance.
7326 HPATH = "instance-reboot"
7327 HTYPE = constants.HTYPE_INSTANCE
7330 def ExpandNames(self):
7331 self._ExpandAndLockInstance()
7333 def BuildHooksEnv(self):
7336 This runs on master, primary and secondary nodes of the instance.
7340 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7341 "REBOOT_TYPE": self.op.reboot_type,
7342 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7345 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7349 def BuildHooksNodes(self):
7350 """Build hooks nodes.
7353 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7356 def CheckPrereq(self):
7357 """Check prerequisites.
7359 This checks that the instance is in the cluster.
7362 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7363 assert self.instance is not None, \
7364 "Cannot retrieve locked instance %s" % self.op.instance_name
7365 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7366 _CheckNodeOnline(self, instance.primary_node)
7368 # check bridges existence
7369 _CheckInstanceBridgesExist(self, instance)
7371 def Exec(self, feedback_fn):
7372 """Reboot the instance.
7375 instance = self.instance
7376 ignore_secondaries = self.op.ignore_secondaries
7377 reboot_type = self.op.reboot_type
7379 remote_info = self.rpc.call_instance_info(instance.primary_node,
7381 instance.hypervisor)
7382 remote_info.Raise("Error checking node %s" % instance.primary_node)
7383 instance_running = bool(remote_info.payload)
7385 node_current = instance.primary_node
7387 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7388 constants.INSTANCE_REBOOT_HARD]:
7389 for disk in instance.disks:
7390 self.cfg.SetDiskID(disk, node_current)
7391 result = self.rpc.call_instance_reboot(node_current, instance,
7393 self.op.shutdown_timeout)
7394 result.Raise("Could not reboot instance")
7396 if instance_running:
7397 result = self.rpc.call_instance_shutdown(node_current, instance,
7398 self.op.shutdown_timeout)
7399 result.Raise("Could not shutdown instance for full reboot")
7400 _ShutdownInstanceDisks(self, instance)
7402 self.LogInfo("Instance %s was already stopped, starting now",
7404 _StartInstanceDisks(self, instance, ignore_secondaries)
7405 result = self.rpc.call_instance_start(node_current,
7406 (instance, None, None), False)
7407 msg = result.fail_msg
7409 _ShutdownInstanceDisks(self, instance)
7410 raise errors.OpExecError("Could not start instance for"
7411 " full reboot: %s" % msg)
7413 self.cfg.MarkInstanceUp(instance.name)
7416 class LUInstanceShutdown(LogicalUnit):
7417 """Shutdown an instance.
7420 HPATH = "instance-stop"
7421 HTYPE = constants.HTYPE_INSTANCE
7424 def ExpandNames(self):
7425 self._ExpandAndLockInstance()
7427 def BuildHooksEnv(self):
7430 This runs on master, primary and secondary nodes of the instance.
7433 env = _BuildInstanceHookEnvByObject(self, self.instance)
7434 env["TIMEOUT"] = self.op.timeout
7437 def BuildHooksNodes(self):
7438 """Build hooks nodes.
7441 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7444 def CheckPrereq(self):
7445 """Check prerequisites.
7447 This checks that the instance is in the cluster.
7450 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7451 assert self.instance is not None, \
7452 "Cannot retrieve locked instance %s" % self.op.instance_name
7454 if not self.op.force:
7455 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7457 self.LogWarning("Ignoring offline instance check")
7459 self.primary_offline = \
7460 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7462 if self.primary_offline and self.op.ignore_offline_nodes:
7463 self.LogWarning("Ignoring offline primary node")
7465 _CheckNodeOnline(self, self.instance.primary_node)
7467 def Exec(self, feedback_fn):
7468 """Shutdown the instance.
7471 instance = self.instance
7472 node_current = instance.primary_node
7473 timeout = self.op.timeout
7475 # If the instance is offline we shouldn't mark it as down, as that
7476 # resets the offline flag.
7477 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7478 self.cfg.MarkInstanceDown(instance.name)
7480 if self.primary_offline:
7481 assert self.op.ignore_offline_nodes
7482 self.LogInfo("Primary node offline, marked instance as stopped")
7484 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7485 msg = result.fail_msg
7487 self.LogWarning("Could not shutdown instance: %s", msg)
7489 _ShutdownInstanceDisks(self, instance)
7492 class LUInstanceReinstall(LogicalUnit):
7493 """Reinstall an instance.
7496 HPATH = "instance-reinstall"
7497 HTYPE = constants.HTYPE_INSTANCE
7500 def ExpandNames(self):
7501 self._ExpandAndLockInstance()
7503 def BuildHooksEnv(self):
7506 This runs on master, primary and secondary nodes of the instance.
7509 return _BuildInstanceHookEnvByObject(self, self.instance)
7511 def BuildHooksNodes(self):
7512 """Build hooks nodes.
7515 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7518 def CheckPrereq(self):
7519 """Check prerequisites.
7521 This checks that the instance is in the cluster and is not running.
7524 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7525 assert instance is not None, \
7526 "Cannot retrieve locked instance %s" % self.op.instance_name
7527 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7528 " offline, cannot reinstall")
7530 if instance.disk_template == constants.DT_DISKLESS:
7531 raise errors.OpPrereqError("Instance '%s' has no disks" %
7532 self.op.instance_name,
7534 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7536 if self.op.os_type is not None:
7538 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7539 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7540 instance_os = self.op.os_type
7542 instance_os = instance.os
7544 nodelist = list(instance.all_nodes)
7546 if self.op.osparams:
7547 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7548 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7549 self.os_inst = i_osdict # the new dict (without defaults)
7553 self.instance = instance
7555 def Exec(self, feedback_fn):
7556 """Reinstall the instance.
7559 inst = self.instance
7561 if self.op.os_type is not None:
7562 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7563 inst.os = self.op.os_type
7564 # Write to configuration
7565 self.cfg.Update(inst, feedback_fn)
7567 _StartInstanceDisks(self, inst, None)
7569 feedback_fn("Running the instance OS create scripts...")
7570 # FIXME: pass debug option from opcode to backend
7571 result = self.rpc.call_instance_os_add(inst.primary_node,
7572 (inst, self.os_inst), True,
7573 self.op.debug_level)
7574 result.Raise("Could not install OS for instance %s on node %s" %
7575 (inst.name, inst.primary_node))
7577 _ShutdownInstanceDisks(self, inst)
7580 class LUInstanceRecreateDisks(LogicalUnit):
7581 """Recreate an instance's missing disks.
7584 HPATH = "instance-recreate-disks"
7585 HTYPE = constants.HTYPE_INSTANCE
7588 _MODIFYABLE = compat.UniqueFrozenset([
7589 constants.IDISK_SIZE,
7590 constants.IDISK_MODE,
7593 # New or changed disk parameters may have different semantics
7594 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7595 constants.IDISK_ADOPT,
7597 # TODO: Implement support changing VG while recreating
7599 constants.IDISK_METAVG,
7600 constants.IDISK_PROVIDER,
7603 def _RunAllocator(self):
7604 """Run the allocator based on input opcode.
7607 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7610 # The allocator should actually run in "relocate" mode, but current
7611 # allocators don't support relocating all the nodes of an instance at
7612 # the same time. As a workaround we use "allocate" mode, but this is
7613 # suboptimal for two reasons:
7614 # - The instance name passed to the allocator is present in the list of
7615 # existing instances, so there could be a conflict within the
7616 # internal structures of the allocator. This doesn't happen with the
7617 # current allocators, but it's a liability.
7618 # - The allocator counts the resources used by the instance twice: once
7619 # because the instance exists already, and once because it tries to
7620 # allocate a new instance.
7621 # The allocator could choose some of the nodes on which the instance is
7622 # running, but that's not a problem. If the instance nodes are broken,
7623 # they should be already be marked as drained or offline, and hence
7624 # skipped by the allocator. If instance disks have been lost for other
7625 # reasons, then recreating the disks on the same nodes should be fine.
7626 disk_template = self.instance.disk_template
7627 spindle_use = be_full[constants.BE_SPINDLE_USE]
7628 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7629 disk_template=disk_template,
7630 tags=list(self.instance.GetTags()),
7631 os=self.instance.os,
7633 vcpus=be_full[constants.BE_VCPUS],
7634 memory=be_full[constants.BE_MAXMEM],
7635 spindle_use=spindle_use,
7636 disks=[{constants.IDISK_SIZE: d.size,
7637 constants.IDISK_MODE: d.mode}
7638 for d in self.instance.disks],
7639 hypervisor=self.instance.hypervisor,
7640 node_whitelist=None)
7641 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7643 ial.Run(self.op.iallocator)
7645 assert req.RequiredNodes() == len(self.instance.all_nodes)
7648 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7649 " %s" % (self.op.iallocator, ial.info),
7652 self.op.nodes = ial.result
7653 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7654 self.op.instance_name, self.op.iallocator,
7655 utils.CommaJoin(ial.result))
7657 def CheckArguments(self):
7658 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7659 # Normalize and convert deprecated list of disk indices
7660 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7662 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7664 raise errors.OpPrereqError("Some disks have been specified more than"
7665 " once: %s" % utils.CommaJoin(duplicates),
7668 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7669 # when neither iallocator nor nodes are specified
7670 if self.op.iallocator or self.op.nodes:
7671 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7673 for (idx, params) in self.op.disks:
7674 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7675 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7677 raise errors.OpPrereqError("Parameters for disk %s try to change"
7678 " unmodifyable parameter(s): %s" %
7679 (idx, utils.CommaJoin(unsupported)),
7682 def ExpandNames(self):
7683 self._ExpandAndLockInstance()
7684 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7687 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7688 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7690 self.needed_locks[locking.LEVEL_NODE] = []
7691 if self.op.iallocator:
7692 # iallocator will select a new node in the same group
7693 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7694 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7696 self.needed_locks[locking.LEVEL_NODE_RES] = []
7698 def DeclareLocks(self, level):
7699 if level == locking.LEVEL_NODEGROUP:
7700 assert self.op.iallocator is not None
7701 assert not self.op.nodes
7702 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7703 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7704 # Lock the primary group used by the instance optimistically; this
7705 # requires going via the node before it's locked, requiring
7706 # verification later on
7707 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7708 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7710 elif level == locking.LEVEL_NODE:
7711 # If an allocator is used, then we lock all the nodes in the current
7712 # instance group, as we don't know yet which ones will be selected;
7713 # if we replace the nodes without using an allocator, locks are
7714 # already declared in ExpandNames; otherwise, we need to lock all the
7715 # instance nodes for disk re-creation
7716 if self.op.iallocator:
7717 assert not self.op.nodes
7718 assert not self.needed_locks[locking.LEVEL_NODE]
7719 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7721 # Lock member nodes of the group of the primary node
7722 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7723 self.needed_locks[locking.LEVEL_NODE].extend(
7724 self.cfg.GetNodeGroup(group_uuid).members)
7726 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7727 elif not self.op.nodes:
7728 self._LockInstancesNodes(primary_only=False)
7729 elif level == locking.LEVEL_NODE_RES:
7731 self.needed_locks[locking.LEVEL_NODE_RES] = \
7732 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7734 def BuildHooksEnv(self):
7737 This runs on master, primary and secondary nodes of the instance.
7740 return _BuildInstanceHookEnvByObject(self, self.instance)
7742 def BuildHooksNodes(self):
7743 """Build hooks nodes.
7746 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7749 def CheckPrereq(self):
7750 """Check prerequisites.
7752 This checks that the instance is in the cluster and is not running.
7755 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7756 assert instance is not None, \
7757 "Cannot retrieve locked instance %s" % self.op.instance_name
7759 if len(self.op.nodes) != len(instance.all_nodes):
7760 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7761 " %d replacement nodes were specified" %
7762 (instance.name, len(instance.all_nodes),
7763 len(self.op.nodes)),
7765 assert instance.disk_template != constants.DT_DRBD8 or \
7766 len(self.op.nodes) == 2
7767 assert instance.disk_template != constants.DT_PLAIN or \
7768 len(self.op.nodes) == 1
7769 primary_node = self.op.nodes[0]
7771 primary_node = instance.primary_node
7772 if not self.op.iallocator:
7773 _CheckNodeOnline(self, primary_node)
7775 if instance.disk_template == constants.DT_DISKLESS:
7776 raise errors.OpPrereqError("Instance '%s' has no disks" %
7777 self.op.instance_name, errors.ECODE_INVAL)
7779 # Verify if node group locks are still correct
7780 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7782 # Node group locks are acquired only for the primary node (and only
7783 # when the allocator is used)
7784 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7787 # if we replace nodes *and* the old primary is offline, we don't
7788 # check the instance state
7789 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7790 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7791 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7792 msg="cannot recreate disks")
7795 self.disks = dict(self.op.disks)
7797 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7799 maxidx = max(self.disks.keys())
7800 if maxidx >= len(instance.disks):
7801 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7804 if ((self.op.nodes or self.op.iallocator) and
7805 sorted(self.disks.keys()) != range(len(instance.disks))):
7806 raise errors.OpPrereqError("Can't recreate disks partially and"
7807 " change the nodes at the same time",
7810 self.instance = instance
7812 if self.op.iallocator:
7813 self._RunAllocator()
7814 # Release unneeded node and node resource locks
7815 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7816 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7817 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7819 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7821 def Exec(self, feedback_fn):
7822 """Recreate the disks.
7825 instance = self.instance
7827 assert (self.owned_locks(locking.LEVEL_NODE) ==
7828 self.owned_locks(locking.LEVEL_NODE_RES))
7831 mods = [] # keeps track of needed changes
7833 for idx, disk in enumerate(instance.disks):
7835 changes = self.disks[idx]
7837 # Disk should not be recreated
7841 # update secondaries for disks, if needed
7842 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7843 # need to update the nodes and minors
7844 assert len(self.op.nodes) == 2
7845 assert len(disk.logical_id) == 6 # otherwise disk internals
7847 (_, _, old_port, _, _, old_secret) = disk.logical_id
7848 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7849 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7850 new_minors[0], new_minors[1], old_secret)
7851 assert len(disk.logical_id) == len(new_id)
7855 mods.append((idx, new_id, changes))
7857 # now that we have passed all asserts above, we can apply the mods
7858 # in a single run (to avoid partial changes)
7859 for idx, new_id, changes in mods:
7860 disk = instance.disks[idx]
7861 if new_id is not None:
7862 assert disk.dev_type == constants.LD_DRBD8
7863 disk.logical_id = new_id
7865 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7866 mode=changes.get(constants.IDISK_MODE, None))
7868 # change primary node, if needed
7870 instance.primary_node = self.op.nodes[0]
7871 self.LogWarning("Changing the instance's nodes, you will have to"
7872 " remove any disks left on the older nodes manually")
7875 self.cfg.Update(instance, feedback_fn)
7877 # All touched nodes must be locked
7878 mylocks = self.owned_locks(locking.LEVEL_NODE)
7879 assert mylocks.issuperset(frozenset(instance.all_nodes))
7880 _CreateDisks(self, instance, to_skip=to_skip)
7883 class LUInstanceRename(LogicalUnit):
7884 """Rename an instance.
7887 HPATH = "instance-rename"
7888 HTYPE = constants.HTYPE_INSTANCE
7890 def CheckArguments(self):
7894 if self.op.ip_check and not self.op.name_check:
7895 # TODO: make the ip check more flexible and not depend on the name check
7896 raise errors.OpPrereqError("IP address check requires a name check",
7899 def BuildHooksEnv(self):
7902 This runs on master, primary and secondary nodes of the instance.
7905 env = _BuildInstanceHookEnvByObject(self, self.instance)
7906 env["INSTANCE_NEW_NAME"] = self.op.new_name
7909 def BuildHooksNodes(self):
7910 """Build hooks nodes.
7913 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7916 def CheckPrereq(self):
7917 """Check prerequisites.
7919 This checks that the instance is in the cluster and is not running.
7922 self.op.instance_name = _ExpandInstanceName(self.cfg,
7923 self.op.instance_name)
7924 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7925 assert instance is not None
7926 _CheckNodeOnline(self, instance.primary_node)
7927 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7928 msg="cannot rename")
7929 self.instance = instance
7931 new_name = self.op.new_name
7932 if self.op.name_check:
7933 hostname = _CheckHostnameSane(self, new_name)
7934 new_name = self.op.new_name = hostname.name
7935 if (self.op.ip_check and
7936 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7937 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7938 (hostname.ip, new_name),
7939 errors.ECODE_NOTUNIQUE)
7941 instance_list = self.cfg.GetInstanceList()
7942 if new_name in instance_list and new_name != instance.name:
7943 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7944 new_name, errors.ECODE_EXISTS)
7946 def Exec(self, feedback_fn):
7947 """Rename the instance.
7950 inst = self.instance
7951 old_name = inst.name
7953 rename_file_storage = False
7954 if (inst.disk_template in constants.DTS_FILEBASED and
7955 self.op.new_name != inst.name):
7956 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7957 rename_file_storage = True
7959 self.cfg.RenameInstance(inst.name, self.op.new_name)
7960 # Change the instance lock. This is definitely safe while we hold the BGL.
7961 # Otherwise the new lock would have to be added in acquired mode.
7963 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7964 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7965 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7967 # re-read the instance from the configuration after rename
7968 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7970 if rename_file_storage:
7971 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7972 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7973 old_file_storage_dir,
7974 new_file_storage_dir)
7975 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7976 " (but the instance has been renamed in Ganeti)" %
7977 (inst.primary_node, old_file_storage_dir,
7978 new_file_storage_dir))
7980 _StartInstanceDisks(self, inst, None)
7981 # update info on disks
7982 info = _GetInstanceInfoText(inst)
7983 for (idx, disk) in enumerate(inst.disks):
7984 for node in inst.all_nodes:
7985 self.cfg.SetDiskID(disk, node)
7986 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7988 self.LogWarning("Error setting info on node %s for disk %s: %s",
7989 node, idx, result.fail_msg)
7991 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7992 old_name, self.op.debug_level)
7993 msg = result.fail_msg
7995 msg = ("Could not run OS rename script for instance %s on node %s"
7996 " (but the instance has been renamed in Ganeti): %s" %
7997 (inst.name, inst.primary_node, msg))
7998 self.LogWarning(msg)
8000 _ShutdownInstanceDisks(self, inst)
8005 class LUInstanceRemove(LogicalUnit):
8006 """Remove an instance.
8009 HPATH = "instance-remove"
8010 HTYPE = constants.HTYPE_INSTANCE
8013 def ExpandNames(self):
8014 self._ExpandAndLockInstance()
8015 self.needed_locks[locking.LEVEL_NODE] = []
8016 self.needed_locks[locking.LEVEL_NODE_RES] = []
8017 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8019 def DeclareLocks(self, level):
8020 if level == locking.LEVEL_NODE:
8021 self._LockInstancesNodes()
8022 elif level == locking.LEVEL_NODE_RES:
8024 self.needed_locks[locking.LEVEL_NODE_RES] = \
8025 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8027 def BuildHooksEnv(self):
8030 This runs on master, primary and secondary nodes of the instance.
8033 env = _BuildInstanceHookEnvByObject(self, self.instance)
8034 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8037 def BuildHooksNodes(self):
8038 """Build hooks nodes.
8041 nl = [self.cfg.GetMasterNode()]
8042 nl_post = list(self.instance.all_nodes) + nl
8043 return (nl, nl_post)
8045 def CheckPrereq(self):
8046 """Check prerequisites.
8048 This checks that the instance is in the cluster.
8051 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8052 assert self.instance is not None, \
8053 "Cannot retrieve locked instance %s" % self.op.instance_name
8055 def Exec(self, feedback_fn):
8056 """Remove the instance.
8059 instance = self.instance
8060 logging.info("Shutting down instance %s on node %s",
8061 instance.name, instance.primary_node)
8063 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8064 self.op.shutdown_timeout)
8065 msg = result.fail_msg
8067 if self.op.ignore_failures:
8068 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8070 raise errors.OpExecError("Could not shutdown instance %s on"
8072 (instance.name, instance.primary_node, msg))
8074 assert (self.owned_locks(locking.LEVEL_NODE) ==
8075 self.owned_locks(locking.LEVEL_NODE_RES))
8076 assert not (set(instance.all_nodes) -
8077 self.owned_locks(locking.LEVEL_NODE)), \
8078 "Not owning correct locks"
8080 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8083 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8084 """Utility function to remove an instance.
8087 logging.info("Removing block devices for instance %s", instance.name)
8089 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8090 if not ignore_failures:
8091 raise errors.OpExecError("Can't remove instance's disks")
8092 feedback_fn("Warning: can't remove instance's disks")
8094 logging.info("Removing instance %s out of cluster config", instance.name)
8096 lu.cfg.RemoveInstance(instance.name)
8098 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8099 "Instance lock removal conflict"
8101 # Remove lock for the instance
8102 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8105 class LUInstanceQuery(NoHooksLU):
8106 """Logical unit for querying instances.
8109 # pylint: disable=W0142
8112 def CheckArguments(self):
8113 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8114 self.op.output_fields, self.op.use_locking)
8116 def ExpandNames(self):
8117 self.iq.ExpandNames(self)
8119 def DeclareLocks(self, level):
8120 self.iq.DeclareLocks(self, level)
8122 def Exec(self, feedback_fn):
8123 return self.iq.OldStyleQuery(self)
8126 def _ExpandNamesForMigration(lu):
8127 """Expands names for use with L{TLMigrateInstance}.
8129 @type lu: L{LogicalUnit}
8132 if lu.op.target_node is not None:
8133 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8135 lu.needed_locks[locking.LEVEL_NODE] = []
8136 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8138 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8139 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8141 # The node allocation lock is actually only needed for replicated instances
8142 # (e.g. DRBD8) and if an iallocator is used.
8143 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8146 def _DeclareLocksForMigration(lu, level):
8147 """Declares locks for L{TLMigrateInstance}.
8149 @type lu: L{LogicalUnit}
8150 @param level: Lock level
8153 if level == locking.LEVEL_NODE_ALLOC:
8154 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8156 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8158 # Node locks are already declared here rather than at LEVEL_NODE as we need
8159 # the instance object anyway to declare the node allocation lock.
8160 if instance.disk_template in constants.DTS_EXT_MIRROR:
8161 if lu.op.target_node is None:
8162 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8163 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8165 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8167 del lu.recalculate_locks[locking.LEVEL_NODE]
8169 lu._LockInstancesNodes() # pylint: disable=W0212
8171 elif level == locking.LEVEL_NODE:
8172 # Node locks are declared together with the node allocation lock
8173 assert (lu.needed_locks[locking.LEVEL_NODE] or
8174 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8176 elif level == locking.LEVEL_NODE_RES:
8178 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8179 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8182 class LUInstanceFailover(LogicalUnit):
8183 """Failover an instance.
8186 HPATH = "instance-failover"
8187 HTYPE = constants.HTYPE_INSTANCE
8190 def CheckArguments(self):
8191 """Check the arguments.
8194 self.iallocator = getattr(self.op, "iallocator", None)
8195 self.target_node = getattr(self.op, "target_node", None)
8197 def ExpandNames(self):
8198 self._ExpandAndLockInstance()
8199 _ExpandNamesForMigration(self)
8202 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8203 self.op.ignore_consistency, True,
8204 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8206 self.tasklets = [self._migrater]
8208 def DeclareLocks(self, level):
8209 _DeclareLocksForMigration(self, level)
8211 def BuildHooksEnv(self):
8214 This runs on master, primary and secondary nodes of the instance.
8217 instance = self._migrater.instance
8218 source_node = instance.primary_node
8219 target_node = self.op.target_node
8221 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8222 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8223 "OLD_PRIMARY": source_node,
8224 "NEW_PRIMARY": target_node,
8227 if instance.disk_template in constants.DTS_INT_MIRROR:
8228 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8229 env["NEW_SECONDARY"] = source_node
8231 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8233 env.update(_BuildInstanceHookEnvByObject(self, instance))
8237 def BuildHooksNodes(self):
8238 """Build hooks nodes.
8241 instance = self._migrater.instance
8242 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8243 return (nl, nl + [instance.primary_node])
8246 class LUInstanceMigrate(LogicalUnit):
8247 """Migrate an instance.
8249 This is migration without shutting down, compared to the failover,
8250 which is done with shutdown.
8253 HPATH = "instance-migrate"
8254 HTYPE = constants.HTYPE_INSTANCE
8257 def ExpandNames(self):
8258 self._ExpandAndLockInstance()
8259 _ExpandNamesForMigration(self)
8262 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8263 False, self.op.allow_failover, False,
8264 self.op.allow_runtime_changes,
8265 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8266 self.op.ignore_ipolicy)
8268 self.tasklets = [self._migrater]
8270 def DeclareLocks(self, level):
8271 _DeclareLocksForMigration(self, level)
8273 def BuildHooksEnv(self):
8276 This runs on master, primary and secondary nodes of the instance.
8279 instance = self._migrater.instance
8280 source_node = instance.primary_node
8281 target_node = self.op.target_node
8282 env = _BuildInstanceHookEnvByObject(self, instance)
8284 "MIGRATE_LIVE": self._migrater.live,
8285 "MIGRATE_CLEANUP": self.op.cleanup,
8286 "OLD_PRIMARY": source_node,
8287 "NEW_PRIMARY": target_node,
8288 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8291 if instance.disk_template in constants.DTS_INT_MIRROR:
8292 env["OLD_SECONDARY"] = target_node
8293 env["NEW_SECONDARY"] = source_node
8295 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8299 def BuildHooksNodes(self):
8300 """Build hooks nodes.
8303 instance = self._migrater.instance
8304 snodes = list(instance.secondary_nodes)
8305 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8309 class LUInstanceMove(LogicalUnit):
8310 """Move an instance by data-copying.
8313 HPATH = "instance-move"
8314 HTYPE = constants.HTYPE_INSTANCE
8317 def ExpandNames(self):
8318 self._ExpandAndLockInstance()
8319 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8320 self.op.target_node = target_node
8321 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8322 self.needed_locks[locking.LEVEL_NODE_RES] = []
8323 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8325 def DeclareLocks(self, level):
8326 if level == locking.LEVEL_NODE:
8327 self._LockInstancesNodes(primary_only=True)
8328 elif level == locking.LEVEL_NODE_RES:
8330 self.needed_locks[locking.LEVEL_NODE_RES] = \
8331 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8333 def BuildHooksEnv(self):
8336 This runs on master, primary and secondary nodes of the instance.
8340 "TARGET_NODE": self.op.target_node,
8341 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8343 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8346 def BuildHooksNodes(self):
8347 """Build hooks nodes.
8351 self.cfg.GetMasterNode(),
8352 self.instance.primary_node,
8353 self.op.target_node,
8357 def CheckPrereq(self):
8358 """Check prerequisites.
8360 This checks that the instance is in the cluster.
8363 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8364 assert self.instance is not None, \
8365 "Cannot retrieve locked instance %s" % self.op.instance_name
8367 node = self.cfg.GetNodeInfo(self.op.target_node)
8368 assert node is not None, \
8369 "Cannot retrieve locked node %s" % self.op.target_node
8371 self.target_node = target_node = node.name
8373 if target_node == instance.primary_node:
8374 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8375 (instance.name, target_node),
8378 bep = self.cfg.GetClusterInfo().FillBE(instance)
8380 for idx, dsk in enumerate(instance.disks):
8381 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8382 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8383 " cannot copy" % idx, errors.ECODE_STATE)
8385 _CheckNodeOnline(self, target_node)
8386 _CheckNodeNotDrained(self, target_node)
8387 _CheckNodeVmCapable(self, target_node)
8388 cluster = self.cfg.GetClusterInfo()
8389 group_info = self.cfg.GetNodeGroup(node.group)
8390 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8391 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8392 ignore=self.op.ignore_ipolicy)
8394 if instance.admin_state == constants.ADMINST_UP:
8395 # check memory requirements on the secondary node
8396 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8397 instance.name, bep[constants.BE_MAXMEM],
8398 instance.hypervisor)
8400 self.LogInfo("Not checking memory on the secondary node as"
8401 " instance will not be started")
8403 # check bridge existance
8404 _CheckInstanceBridgesExist(self, instance, node=target_node)
8406 def Exec(self, feedback_fn):
8407 """Move an instance.
8409 The move is done by shutting it down on its present node, copying
8410 the data over (slow) and starting it on the new node.
8413 instance = self.instance
8415 source_node = instance.primary_node
8416 target_node = self.target_node
8418 self.LogInfo("Shutting down instance %s on source node %s",
8419 instance.name, source_node)
8421 assert (self.owned_locks(locking.LEVEL_NODE) ==
8422 self.owned_locks(locking.LEVEL_NODE_RES))
8424 result = self.rpc.call_instance_shutdown(source_node, instance,
8425 self.op.shutdown_timeout)
8426 msg = result.fail_msg
8428 if self.op.ignore_consistency:
8429 self.LogWarning("Could not shutdown instance %s on node %s."
8430 " Proceeding anyway. Please make sure node"
8431 " %s is down. Error details: %s",
8432 instance.name, source_node, source_node, msg)
8434 raise errors.OpExecError("Could not shutdown instance %s on"
8436 (instance.name, source_node, msg))
8438 # create the target disks
8440 _CreateDisks(self, instance, target_node=target_node)
8441 except errors.OpExecError:
8442 self.LogWarning("Device creation failed, reverting...")
8444 _RemoveDisks(self, instance, target_node=target_node)
8446 self.cfg.ReleaseDRBDMinors(instance.name)
8449 cluster_name = self.cfg.GetClusterInfo().cluster_name
8452 # activate, get path, copy the data over
8453 for idx, disk in enumerate(instance.disks):
8454 self.LogInfo("Copying data for disk %d", idx)
8455 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8456 instance.name, True, idx)
8458 self.LogWarning("Can't assemble newly created disk %d: %s",
8459 idx, result.fail_msg)
8460 errs.append(result.fail_msg)
8462 dev_path = result.payload
8463 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8464 target_node, dev_path,
8467 self.LogWarning("Can't copy data over for disk %d: %s",
8468 idx, result.fail_msg)
8469 errs.append(result.fail_msg)
8473 self.LogWarning("Some disks failed to copy, aborting")
8475 _RemoveDisks(self, instance, target_node=target_node)
8477 self.cfg.ReleaseDRBDMinors(instance.name)
8478 raise errors.OpExecError("Errors during disk copy: %s" %
8481 instance.primary_node = target_node
8482 self.cfg.Update(instance, feedback_fn)
8484 self.LogInfo("Removing the disks on the original node")
8485 _RemoveDisks(self, instance, target_node=source_node)
8487 # Only start the instance if it's marked as up
8488 if instance.admin_state == constants.ADMINST_UP:
8489 self.LogInfo("Starting instance %s on node %s",
8490 instance.name, target_node)
8492 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8493 ignore_secondaries=True)
8495 _ShutdownInstanceDisks(self, instance)
8496 raise errors.OpExecError("Can't activate the instance's disks")
8498 result = self.rpc.call_instance_start(target_node,
8499 (instance, None, None), False)
8500 msg = result.fail_msg
8502 _ShutdownInstanceDisks(self, instance)
8503 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8504 (instance.name, target_node, msg))
8507 class LUNodeMigrate(LogicalUnit):
8508 """Migrate all instances from a node.
8511 HPATH = "node-migrate"
8512 HTYPE = constants.HTYPE_NODE
8515 def CheckArguments(self):
8518 def ExpandNames(self):
8519 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8521 self.share_locks = _ShareAll()
8522 self.needed_locks = {
8523 locking.LEVEL_NODE: [self.op.node_name],
8526 def BuildHooksEnv(self):
8529 This runs on the master, the primary and all the secondaries.
8533 "NODE_NAME": self.op.node_name,
8534 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8537 def BuildHooksNodes(self):
8538 """Build hooks nodes.
8541 nl = [self.cfg.GetMasterNode()]
8544 def CheckPrereq(self):
8547 def Exec(self, feedback_fn):
8548 # Prepare jobs for migration instances
8549 allow_runtime_changes = self.op.allow_runtime_changes
8551 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8554 iallocator=self.op.iallocator,
8555 target_node=self.op.target_node,
8556 allow_runtime_changes=allow_runtime_changes,
8557 ignore_ipolicy=self.op.ignore_ipolicy)]
8558 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8560 # TODO: Run iallocator in this opcode and pass correct placement options to
8561 # OpInstanceMigrate. Since other jobs can modify the cluster between
8562 # running the iallocator and the actual migration, a good consistency model
8563 # will have to be found.
8565 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8566 frozenset([self.op.node_name]))
8568 return ResultWithJobs(jobs)
8571 class TLMigrateInstance(Tasklet):
8572 """Tasklet class for instance migration.
8575 @ivar live: whether the migration will be done live or non-live;
8576 this variable is initalized only after CheckPrereq has run
8577 @type cleanup: boolean
8578 @ivar cleanup: Wheater we cleanup from a failed migration
8579 @type iallocator: string
8580 @ivar iallocator: The iallocator used to determine target_node
8581 @type target_node: string
8582 @ivar target_node: If given, the target_node to reallocate the instance to
8583 @type failover: boolean
8584 @ivar failover: Whether operation results in failover or migration
8585 @type fallback: boolean
8586 @ivar fallback: Whether fallback to failover is allowed if migration not
8588 @type ignore_consistency: boolean
8589 @ivar ignore_consistency: Wheter we should ignore consistency between source
8591 @type shutdown_timeout: int
8592 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8593 @type ignore_ipolicy: bool
8594 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8599 _MIGRATION_POLL_INTERVAL = 1 # seconds
8600 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8602 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8603 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8605 """Initializes this class.
8608 Tasklet.__init__(self, lu)
8611 self.instance_name = instance_name
8612 self.cleanup = cleanup
8613 self.live = False # will be overridden later
8614 self.failover = failover
8615 self.fallback = fallback
8616 self.ignore_consistency = ignore_consistency
8617 self.shutdown_timeout = shutdown_timeout
8618 self.ignore_ipolicy = ignore_ipolicy
8619 self.allow_runtime_changes = allow_runtime_changes
8621 def CheckPrereq(self):
8622 """Check prerequisites.
8624 This checks that the instance is in the cluster.
8627 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8628 instance = self.cfg.GetInstanceInfo(instance_name)
8629 assert instance is not None
8630 self.instance = instance
8631 cluster = self.cfg.GetClusterInfo()
8633 if (not self.cleanup and
8634 not instance.admin_state == constants.ADMINST_UP and
8635 not self.failover and self.fallback):
8636 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8637 " switching to failover")
8638 self.failover = True
8640 if instance.disk_template not in constants.DTS_MIRRORED:
8645 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8646 " %s" % (instance.disk_template, text),
8649 if instance.disk_template in constants.DTS_EXT_MIRROR:
8650 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8652 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8654 if self.lu.op.iallocator:
8655 self._RunAllocator()
8657 # We set set self.target_node as it is required by
8659 self.target_node = self.lu.op.target_node
8661 # Check that the target node is correct in terms of instance policy
8662 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8663 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8664 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8666 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8667 ignore=self.ignore_ipolicy)
8669 # self.target_node is already populated, either directly or by the
8671 target_node = self.target_node
8672 if self.target_node == instance.primary_node:
8673 raise errors.OpPrereqError("Cannot migrate instance %s"
8674 " to its primary (%s)" %
8675 (instance.name, instance.primary_node),
8678 if len(self.lu.tasklets) == 1:
8679 # It is safe to release locks only when we're the only tasklet
8681 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8682 keep=[instance.primary_node, self.target_node])
8683 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8686 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8688 secondary_nodes = instance.secondary_nodes
8689 if not secondary_nodes:
8690 raise errors.ConfigurationError("No secondary node but using"
8691 " %s disk template" %
8692 instance.disk_template)
8693 target_node = secondary_nodes[0]
8694 if self.lu.op.iallocator or (self.lu.op.target_node and
8695 self.lu.op.target_node != target_node):
8697 text = "failed over"
8700 raise errors.OpPrereqError("Instances with disk template %s cannot"
8701 " be %s to arbitrary nodes"
8702 " (neither an iallocator nor a target"
8703 " node can be passed)" %
8704 (instance.disk_template, text),
8706 nodeinfo = self.cfg.GetNodeInfo(target_node)
8707 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8708 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8710 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8711 ignore=self.ignore_ipolicy)
8713 i_be = cluster.FillBE(instance)
8715 # check memory requirements on the secondary node
8716 if (not self.cleanup and
8717 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8718 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8719 "migrating instance %s" %
8721 i_be[constants.BE_MINMEM],
8722 instance.hypervisor)
8724 self.lu.LogInfo("Not checking memory on the secondary node as"
8725 " instance will not be started")
8727 # check if failover must be forced instead of migration
8728 if (not self.cleanup and not self.failover and
8729 i_be[constants.BE_ALWAYS_FAILOVER]):
8730 self.lu.LogInfo("Instance configured to always failover; fallback"
8732 self.failover = True
8734 # check bridge existance
8735 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8737 if not self.cleanup:
8738 _CheckNodeNotDrained(self.lu, target_node)
8739 if not self.failover:
8740 result = self.rpc.call_instance_migratable(instance.primary_node,
8742 if result.fail_msg and self.fallback:
8743 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8745 self.failover = True
8747 result.Raise("Can't migrate, please use failover",
8748 prereq=True, ecode=errors.ECODE_STATE)
8750 assert not (self.failover and self.cleanup)
8752 if not self.failover:
8753 if self.lu.op.live is not None and self.lu.op.mode is not None:
8754 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8755 " parameters are accepted",
8757 if self.lu.op.live is not None:
8759 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8761 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8762 # reset the 'live' parameter to None so that repeated
8763 # invocations of CheckPrereq do not raise an exception
8764 self.lu.op.live = None
8765 elif self.lu.op.mode is None:
8766 # read the default value from the hypervisor
8767 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8768 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8770 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8772 # Failover is never live
8775 if not (self.failover or self.cleanup):
8776 remote_info = self.rpc.call_instance_info(instance.primary_node,
8778 instance.hypervisor)
8779 remote_info.Raise("Error checking instance on node %s" %
8780 instance.primary_node)
8781 instance_running = bool(remote_info.payload)
8782 if instance_running:
8783 self.current_mem = int(remote_info.payload["memory"])
8785 def _RunAllocator(self):
8786 """Run the allocator based on input opcode.
8789 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8791 # FIXME: add a self.ignore_ipolicy option
8792 req = iallocator.IAReqRelocate(name=self.instance_name,
8793 relocate_from=[self.instance.primary_node])
8794 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8796 ial.Run(self.lu.op.iallocator)
8799 raise errors.OpPrereqError("Can't compute nodes using"
8800 " iallocator '%s': %s" %
8801 (self.lu.op.iallocator, ial.info),
8803 self.target_node = ial.result[0]
8804 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8805 self.instance_name, self.lu.op.iallocator,
8806 utils.CommaJoin(ial.result))
8808 def _WaitUntilSync(self):
8809 """Poll with custom rpc for disk sync.
8811 This uses our own step-based rpc call.
8814 self.feedback_fn("* wait until resync is done")
8818 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8820 (self.instance.disks,
8823 for node, nres in result.items():
8824 nres.Raise("Cannot resync disks on node %s" % node)
8825 node_done, node_percent = nres.payload
8826 all_done = all_done and node_done
8827 if node_percent is not None:
8828 min_percent = min(min_percent, node_percent)
8830 if min_percent < 100:
8831 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8834 def _EnsureSecondary(self, node):
8835 """Demote a node to secondary.
8838 self.feedback_fn("* switching node %s to secondary mode" % node)
8840 for dev in self.instance.disks:
8841 self.cfg.SetDiskID(dev, node)
8843 result = self.rpc.call_blockdev_close(node, self.instance.name,
8844 self.instance.disks)
8845 result.Raise("Cannot change disk to secondary on node %s" % node)
8847 def _GoStandalone(self):
8848 """Disconnect from the network.
8851 self.feedback_fn("* changing into standalone mode")
8852 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8853 self.instance.disks)
8854 for node, nres in result.items():
8855 nres.Raise("Cannot disconnect disks node %s" % node)
8857 def _GoReconnect(self, multimaster):
8858 """Reconnect to the network.
8864 msg = "single-master"
8865 self.feedback_fn("* changing disks into %s mode" % msg)
8866 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8867 (self.instance.disks, self.instance),
8868 self.instance.name, multimaster)
8869 for node, nres in result.items():
8870 nres.Raise("Cannot change disks config on node %s" % node)
8872 def _ExecCleanup(self):
8873 """Try to cleanup after a failed migration.
8875 The cleanup is done by:
8876 - check that the instance is running only on one node
8877 (and update the config if needed)
8878 - change disks on its secondary node to secondary
8879 - wait until disks are fully synchronized
8880 - disconnect from the network
8881 - change disks into single-master mode
8882 - wait again until disks are fully synchronized
8885 instance = self.instance
8886 target_node = self.target_node
8887 source_node = self.source_node
8889 # check running on only one node
8890 self.feedback_fn("* checking where the instance actually runs"
8891 " (if this hangs, the hypervisor might be in"
8893 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8894 for node, result in ins_l.items():
8895 result.Raise("Can't contact node %s" % node)
8897 runningon_source = instance.name in ins_l[source_node].payload
8898 runningon_target = instance.name in ins_l[target_node].payload
8900 if runningon_source and runningon_target:
8901 raise errors.OpExecError("Instance seems to be running on two nodes,"
8902 " or the hypervisor is confused; you will have"
8903 " to ensure manually that it runs only on one"
8904 " and restart this operation")
8906 if not (runningon_source or runningon_target):
8907 raise errors.OpExecError("Instance does not seem to be running at all;"
8908 " in this case it's safer to repair by"
8909 " running 'gnt-instance stop' to ensure disk"
8910 " shutdown, and then restarting it")
8912 if runningon_target:
8913 # the migration has actually succeeded, we need to update the config
8914 self.feedback_fn("* instance running on secondary node (%s),"
8915 " updating config" % target_node)
8916 instance.primary_node = target_node
8917 self.cfg.Update(instance, self.feedback_fn)
8918 demoted_node = source_node
8920 self.feedback_fn("* instance confirmed to be running on its"
8921 " primary node (%s)" % source_node)
8922 demoted_node = target_node
8924 if instance.disk_template in constants.DTS_INT_MIRROR:
8925 self._EnsureSecondary(demoted_node)
8927 self._WaitUntilSync()
8928 except errors.OpExecError:
8929 # we ignore here errors, since if the device is standalone, it
8930 # won't be able to sync
8932 self._GoStandalone()
8933 self._GoReconnect(False)
8934 self._WaitUntilSync()
8936 self.feedback_fn("* done")
8938 def _RevertDiskStatus(self):
8939 """Try to revert the disk status after a failed migration.
8942 target_node = self.target_node
8943 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8947 self._EnsureSecondary(target_node)
8948 self._GoStandalone()
8949 self._GoReconnect(False)
8950 self._WaitUntilSync()
8951 except errors.OpExecError, err:
8952 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8953 " please try to recover the instance manually;"
8954 " error '%s'" % str(err))
8956 def _AbortMigration(self):
8957 """Call the hypervisor code to abort a started migration.
8960 instance = self.instance
8961 target_node = self.target_node
8962 source_node = self.source_node
8963 migration_info = self.migration_info
8965 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8969 abort_msg = abort_result.fail_msg
8971 logging.error("Aborting migration failed on target node %s: %s",
8972 target_node, abort_msg)
8973 # Don't raise an exception here, as we stil have to try to revert the
8974 # disk status, even if this step failed.
8976 abort_result = self.rpc.call_instance_finalize_migration_src(
8977 source_node, instance, False, self.live)
8978 abort_msg = abort_result.fail_msg
8980 logging.error("Aborting migration failed on source node %s: %s",
8981 source_node, abort_msg)
8983 def _ExecMigration(self):
8984 """Migrate an instance.
8986 The migrate is done by:
8987 - change the disks into dual-master mode
8988 - wait until disks are fully synchronized again
8989 - migrate the instance
8990 - change disks on the new secondary node (the old primary) to secondary
8991 - wait until disks are fully synchronized
8992 - change disks into single-master mode
8995 instance = self.instance
8996 target_node = self.target_node
8997 source_node = self.source_node
8999 # Check for hypervisor version mismatch and warn the user.
9000 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9001 None, [self.instance.hypervisor], False)
9002 for ninfo in nodeinfo.values():
9003 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9005 (_, _, (src_info, )) = nodeinfo[source_node].payload
9006 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9008 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9009 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9010 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9011 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9012 if src_version != dst_version:
9013 self.feedback_fn("* warning: hypervisor version mismatch between"
9014 " source (%s) and target (%s) node" %
9015 (src_version, dst_version))
9017 self.feedback_fn("* checking disk consistency between source and target")
9018 for (idx, dev) in enumerate(instance.disks):
9019 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9020 raise errors.OpExecError("Disk %s is degraded or not fully"
9021 " synchronized on target node,"
9022 " aborting migration" % idx)
9024 if self.current_mem > self.tgt_free_mem:
9025 if not self.allow_runtime_changes:
9026 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9027 " free memory to fit instance %s on target"
9028 " node %s (have %dMB, need %dMB)" %
9029 (instance.name, target_node,
9030 self.tgt_free_mem, self.current_mem))
9031 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9032 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9035 rpcres.Raise("Cannot modify instance runtime memory")
9037 # First get the migration information from the remote node
9038 result = self.rpc.call_migration_info(source_node, instance)
9039 msg = result.fail_msg
9041 log_err = ("Failed fetching source migration information from %s: %s" %
9043 logging.error(log_err)
9044 raise errors.OpExecError(log_err)
9046 self.migration_info = migration_info = result.payload
9048 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9049 # Then switch the disks to master/master mode
9050 self._EnsureSecondary(target_node)
9051 self._GoStandalone()
9052 self._GoReconnect(True)
9053 self._WaitUntilSync()
9055 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9056 result = self.rpc.call_accept_instance(target_node,
9059 self.nodes_ip[target_node])
9061 msg = result.fail_msg
9063 logging.error("Instance pre-migration failed, trying to revert"
9064 " disk status: %s", msg)
9065 self.feedback_fn("Pre-migration failed, aborting")
9066 self._AbortMigration()
9067 self._RevertDiskStatus()
9068 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9069 (instance.name, msg))
9071 self.feedback_fn("* migrating instance to %s" % target_node)
9072 result = self.rpc.call_instance_migrate(source_node, instance,
9073 self.nodes_ip[target_node],
9075 msg = result.fail_msg
9077 logging.error("Instance migration failed, trying to revert"
9078 " disk status: %s", msg)
9079 self.feedback_fn("Migration failed, aborting")
9080 self._AbortMigration()
9081 self._RevertDiskStatus()
9082 raise errors.OpExecError("Could not migrate instance %s: %s" %
9083 (instance.name, msg))
9085 self.feedback_fn("* starting memory transfer")
9086 last_feedback = time.time()
9088 result = self.rpc.call_instance_get_migration_status(source_node,
9090 msg = result.fail_msg
9091 ms = result.payload # MigrationStatus instance
9092 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9093 logging.error("Instance migration failed, trying to revert"
9094 " disk status: %s", msg)
9095 self.feedback_fn("Migration failed, aborting")
9096 self._AbortMigration()
9097 self._RevertDiskStatus()
9099 msg = "hypervisor returned failure"
9100 raise errors.OpExecError("Could not migrate instance %s: %s" %
9101 (instance.name, msg))
9103 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9104 self.feedback_fn("* memory transfer complete")
9107 if (utils.TimeoutExpired(last_feedback,
9108 self._MIGRATION_FEEDBACK_INTERVAL) and
9109 ms.transferred_ram is not None):
9110 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9111 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9112 last_feedback = time.time()
9114 time.sleep(self._MIGRATION_POLL_INTERVAL)
9116 result = self.rpc.call_instance_finalize_migration_src(source_node,
9120 msg = result.fail_msg
9122 logging.error("Instance migration succeeded, but finalization failed"
9123 " on the source node: %s", msg)
9124 raise errors.OpExecError("Could not finalize instance migration: %s" %
9127 instance.primary_node = target_node
9129 # distribute new instance config to the other nodes
9130 self.cfg.Update(instance, self.feedback_fn)
9132 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9136 msg = result.fail_msg
9138 logging.error("Instance migration succeeded, but finalization failed"
9139 " on the target node: %s", msg)
9140 raise errors.OpExecError("Could not finalize instance migration: %s" %
9143 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9144 self._EnsureSecondary(source_node)
9145 self._WaitUntilSync()
9146 self._GoStandalone()
9147 self._GoReconnect(False)
9148 self._WaitUntilSync()
9150 # If the instance's disk template is `rbd' or `ext' and there was a
9151 # successful migration, unmap the device from the source node.
9152 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9153 disks = _ExpandCheckDisks(instance, instance.disks)
9154 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9156 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9157 msg = result.fail_msg
9159 logging.error("Migration was successful, but couldn't unmap the"
9160 " block device %s on source node %s: %s",
9161 disk.iv_name, source_node, msg)
9162 logging.error("You need to unmap the device %s manually on %s",
9163 disk.iv_name, source_node)
9165 self.feedback_fn("* done")
9167 def _ExecFailover(self):
9168 """Failover an instance.
9170 The failover is done by shutting it down on its present node and
9171 starting it on the secondary.
9174 instance = self.instance
9175 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9177 source_node = instance.primary_node
9178 target_node = self.target_node
9180 if instance.admin_state == constants.ADMINST_UP:
9181 self.feedback_fn("* checking disk consistency between source and target")
9182 for (idx, dev) in enumerate(instance.disks):
9183 # for drbd, these are drbd over lvm
9184 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9186 if primary_node.offline:
9187 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9189 (primary_node.name, idx, target_node))
9190 elif not self.ignore_consistency:
9191 raise errors.OpExecError("Disk %s is degraded on target node,"
9192 " aborting failover" % idx)
9194 self.feedback_fn("* not checking disk consistency as instance is not"
9197 self.feedback_fn("* shutting down instance on source node")
9198 logging.info("Shutting down instance %s on node %s",
9199 instance.name, source_node)
9201 result = self.rpc.call_instance_shutdown(source_node, instance,
9202 self.shutdown_timeout)
9203 msg = result.fail_msg
9205 if self.ignore_consistency or primary_node.offline:
9206 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9207 " proceeding anyway; please make sure node"
9208 " %s is down; error details: %s",
9209 instance.name, source_node, source_node, msg)
9211 raise errors.OpExecError("Could not shutdown instance %s on"
9213 (instance.name, source_node, msg))
9215 self.feedback_fn("* deactivating the instance's disks on source node")
9216 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9217 raise errors.OpExecError("Can't shut down the instance's disks")
9219 instance.primary_node = target_node
9220 # distribute new instance config to the other nodes
9221 self.cfg.Update(instance, self.feedback_fn)
9223 # Only start the instance if it's marked as up
9224 if instance.admin_state == constants.ADMINST_UP:
9225 self.feedback_fn("* activating the instance's disks on target node %s" %
9227 logging.info("Starting instance %s on node %s",
9228 instance.name, target_node)
9230 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9231 ignore_secondaries=True)
9233 _ShutdownInstanceDisks(self.lu, instance)
9234 raise errors.OpExecError("Can't activate the instance's disks")
9236 self.feedback_fn("* starting the instance on the target node %s" %
9238 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9240 msg = result.fail_msg
9242 _ShutdownInstanceDisks(self.lu, instance)
9243 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9244 (instance.name, target_node, msg))
9246 def Exec(self, feedback_fn):
9247 """Perform the migration.
9250 self.feedback_fn = feedback_fn
9251 self.source_node = self.instance.primary_node
9253 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9254 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9255 self.target_node = self.instance.secondary_nodes[0]
9256 # Otherwise self.target_node has been populated either
9257 # directly, or through an iallocator.
9259 self.all_nodes = [self.source_node, self.target_node]
9260 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9261 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9264 feedback_fn("Failover instance %s" % self.instance.name)
9265 self._ExecFailover()
9267 feedback_fn("Migrating instance %s" % self.instance.name)
9270 return self._ExecCleanup()
9272 return self._ExecMigration()
9275 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9277 """Wrapper around L{_CreateBlockDevInner}.
9279 This method annotates the root device first.
9282 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9283 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9284 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9285 force_open, excl_stor)
9288 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9289 info, force_open, excl_stor):
9290 """Create a tree of block devices on a given node.
9292 If this device type has to be created on secondaries, create it and
9295 If not, just recurse to children keeping the same 'force' value.
9297 @attention: The device has to be annotated already.
9299 @param lu: the lu on whose behalf we execute
9300 @param node: the node on which to create the device
9301 @type instance: L{objects.Instance}
9302 @param instance: the instance which owns the device
9303 @type device: L{objects.Disk}
9304 @param device: the device to create
9305 @type force_create: boolean
9306 @param force_create: whether to force creation of this device; this
9307 will be change to True whenever we find a device which has
9308 CreateOnSecondary() attribute
9309 @param info: the extra 'metadata' we should attach to the device
9310 (this will be represented as a LVM tag)
9311 @type force_open: boolean
9312 @param force_open: this parameter will be passes to the
9313 L{backend.BlockdevCreate} function where it specifies
9314 whether we run on primary or not, and it affects both
9315 the child assembly and the device own Open() execution
9316 @type excl_stor: boolean
9317 @param excl_stor: Whether exclusive_storage is active for the node
9320 if device.CreateOnSecondary():
9324 for child in device.children:
9325 _CreateBlockDevInner(lu, node, instance, child, force_create,
9326 info, force_open, excl_stor)
9328 if not force_create:
9331 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9335 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9337 """Create a single block device on a given node.
9339 This will not recurse over children of the device, so they must be
9342 @param lu: the lu on whose behalf we execute
9343 @param node: the node on which to create the device
9344 @type instance: L{objects.Instance}
9345 @param instance: the instance which owns the device
9346 @type device: L{objects.Disk}
9347 @param device: the device to create
9348 @param info: the extra 'metadata' we should attach to the device
9349 (this will be represented as a LVM tag)
9350 @type force_open: boolean
9351 @param force_open: this parameter will be passes to the
9352 L{backend.BlockdevCreate} function where it specifies
9353 whether we run on primary or not, and it affects both
9354 the child assembly and the device own Open() execution
9355 @type excl_stor: boolean
9356 @param excl_stor: Whether exclusive_storage is active for the node
9359 lu.cfg.SetDiskID(device, node)
9360 result = lu.rpc.call_blockdev_create(node, device, device.size,
9361 instance.name, force_open, info,
9363 result.Raise("Can't create block device %s on"
9364 " node %s for instance %s" % (device, node, instance.name))
9365 if device.physical_id is None:
9366 device.physical_id = result.payload
9369 def _GenerateUniqueNames(lu, exts):
9370 """Generate a suitable LV name.
9372 This will generate a logical volume name for the given instance.
9377 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9378 results.append("%s%s" % (new_id, val))
9382 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9383 iv_name, p_minor, s_minor):
9384 """Generate a drbd8 device complete with its children.
9387 assert len(vgnames) == len(names) == 2
9388 port = lu.cfg.AllocatePort()
9389 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9391 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9392 logical_id=(vgnames[0], names[0]),
9394 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9395 size=constants.DRBD_META_SIZE,
9396 logical_id=(vgnames[1], names[1]),
9398 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9399 logical_id=(primary, secondary, port,
9402 children=[dev_data, dev_meta],
9403 iv_name=iv_name, params={})
9407 _DISK_TEMPLATE_NAME_PREFIX = {
9408 constants.DT_PLAIN: "",
9409 constants.DT_RBD: ".rbd",
9410 constants.DT_EXT: ".ext",
9414 _DISK_TEMPLATE_DEVICE_TYPE = {
9415 constants.DT_PLAIN: constants.LD_LV,
9416 constants.DT_FILE: constants.LD_FILE,
9417 constants.DT_SHARED_FILE: constants.LD_FILE,
9418 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9419 constants.DT_RBD: constants.LD_RBD,
9420 constants.DT_EXT: constants.LD_EXT,
9424 def _GenerateDiskTemplate(
9425 lu, template_name, instance_name, primary_node, secondary_nodes,
9426 disk_info, file_storage_dir, file_driver, base_index,
9427 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9428 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9429 """Generate the entire disk layout for a given template type.
9432 vgname = lu.cfg.GetVGName()
9433 disk_count = len(disk_info)
9436 if template_name == constants.DT_DISKLESS:
9438 elif template_name == constants.DT_DRBD8:
9439 if len(secondary_nodes) != 1:
9440 raise errors.ProgrammerError("Wrong template configuration")
9441 remote_node = secondary_nodes[0]
9442 minors = lu.cfg.AllocateDRBDMinor(
9443 [primary_node, remote_node] * len(disk_info), instance_name)
9445 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9447 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9450 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9451 for i in range(disk_count)]):
9452 names.append(lv_prefix + "_data")
9453 names.append(lv_prefix + "_meta")
9454 for idx, disk in enumerate(disk_info):
9455 disk_index = idx + base_index
9456 data_vg = disk.get(constants.IDISK_VG, vgname)
9457 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9458 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9459 disk[constants.IDISK_SIZE],
9461 names[idx * 2:idx * 2 + 2],
9462 "disk/%d" % disk_index,
9463 minors[idx * 2], minors[idx * 2 + 1])
9464 disk_dev.mode = disk[constants.IDISK_MODE]
9465 disks.append(disk_dev)
9468 raise errors.ProgrammerError("Wrong template configuration")
9470 if template_name == constants.DT_FILE:
9472 elif template_name == constants.DT_SHARED_FILE:
9473 _req_shr_file_storage()
9475 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9476 if name_prefix is None:
9479 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9480 (name_prefix, base_index + i)
9481 for i in range(disk_count)])
9483 if template_name == constants.DT_PLAIN:
9485 def logical_id_fn(idx, _, disk):
9486 vg = disk.get(constants.IDISK_VG, vgname)
9487 return (vg, names[idx])
9489 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9491 lambda _, disk_index, disk: (file_driver,
9492 "%s/disk%d" % (file_storage_dir,
9494 elif template_name == constants.DT_BLOCK:
9496 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9497 disk[constants.IDISK_ADOPT])
9498 elif template_name == constants.DT_RBD:
9499 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9500 elif template_name == constants.DT_EXT:
9501 def logical_id_fn(idx, _, disk):
9502 provider = disk.get(constants.IDISK_PROVIDER, None)
9503 if provider is None:
9504 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9505 " not found", constants.DT_EXT,
9506 constants.IDISK_PROVIDER)
9507 return (provider, names[idx])
9509 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9511 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9513 for idx, disk in enumerate(disk_info):
9515 # Only for the Ext template add disk_info to params
9516 if template_name == constants.DT_EXT:
9517 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9519 if key not in constants.IDISK_PARAMS:
9520 params[key] = disk[key]
9521 disk_index = idx + base_index
9522 size = disk[constants.IDISK_SIZE]
9523 feedback_fn("* disk %s, size %s" %
9524 (disk_index, utils.FormatUnit(size, "h")))
9525 disks.append(objects.Disk(dev_type=dev_type, size=size,
9526 logical_id=logical_id_fn(idx, disk_index, disk),
9527 iv_name="disk/%d" % disk_index,
9528 mode=disk[constants.IDISK_MODE],
9534 def _GetInstanceInfoText(instance):
9535 """Compute that text that should be added to the disk's metadata.
9538 return "originstname+%s" % instance.name
9541 def _CalcEta(time_taken, written, total_size):
9542 """Calculates the ETA based on size written and total size.
9544 @param time_taken: The time taken so far
9545 @param written: amount written so far
9546 @param total_size: The total size of data to be written
9547 @return: The remaining time in seconds
9550 avg_time = time_taken / float(written)
9551 return (total_size - written) * avg_time
9554 def _WipeDisks(lu, instance, disks=None):
9555 """Wipes instance disks.
9557 @type lu: L{LogicalUnit}
9558 @param lu: the logical unit on whose behalf we execute
9559 @type instance: L{objects.Instance}
9560 @param instance: the instance whose disks we should create
9561 @return: the success of the wipe
9564 node = instance.primary_node
9567 disks = [(idx, disk, 0)
9568 for (idx, disk) in enumerate(instance.disks)]
9570 for (_, device, _) in disks:
9571 lu.cfg.SetDiskID(device, node)
9573 logging.info("Pausing synchronization of disks of instance '%s'",
9575 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9576 (map(compat.snd, disks),
9579 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9581 for idx, success in enumerate(result.payload):
9583 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9584 " failed", idx, instance.name)
9587 for (idx, device, offset) in disks:
9588 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9589 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9591 int(min(constants.MAX_WIPE_CHUNK,
9592 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9596 start_time = time.time()
9601 info_text = (" (from %s to %s)" %
9602 (utils.FormatUnit(offset, "h"),
9603 utils.FormatUnit(size, "h")))
9605 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9607 logging.info("Wiping disk %d for instance %s on node %s using"
9608 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9610 while offset < size:
9611 wipe_size = min(wipe_chunk_size, size - offset)
9613 logging.debug("Wiping disk %d, offset %s, chunk %s",
9614 idx, offset, wipe_size)
9616 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9618 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9619 (idx, offset, wipe_size))
9623 if now - last_output >= 60:
9624 eta = _CalcEta(now - start_time, offset, size)
9625 lu.LogInfo(" - done: %.1f%% ETA: %s",
9626 offset / float(size) * 100, utils.FormatSeconds(eta))
9629 logging.info("Resuming synchronization of disks for instance '%s'",
9632 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9633 (map(compat.snd, disks),
9638 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9639 node, result.fail_msg)
9641 for idx, success in enumerate(result.payload):
9643 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9644 " failed", idx, instance.name)
9647 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9648 """Create all disks for an instance.
9650 This abstracts away some work from AddInstance.
9652 @type lu: L{LogicalUnit}
9653 @param lu: the logical unit on whose behalf we execute
9654 @type instance: L{objects.Instance}
9655 @param instance: the instance whose disks we should create
9657 @param to_skip: list of indices to skip
9658 @type target_node: string
9659 @param target_node: if passed, overrides the target node for creation
9661 @return: the success of the creation
9664 info = _GetInstanceInfoText(instance)
9665 if target_node is None:
9666 pnode = instance.primary_node
9667 all_nodes = instance.all_nodes
9672 if instance.disk_template in constants.DTS_FILEBASED:
9673 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9674 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9676 result.Raise("Failed to create directory '%s' on"
9677 " node %s" % (file_storage_dir, pnode))
9679 # Note: this needs to be kept in sync with adding of disks in
9680 # LUInstanceSetParams
9681 for idx, device in enumerate(instance.disks):
9682 if to_skip and idx in to_skip:
9684 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9686 for node in all_nodes:
9687 f_create = node == pnode
9688 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9691 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9692 """Remove all disks for an instance.
9694 This abstracts away some work from `AddInstance()` and
9695 `RemoveInstance()`. Note that in case some of the devices couldn't
9696 be removed, the removal will continue with the other ones (compare
9697 with `_CreateDisks()`).
9699 @type lu: L{LogicalUnit}
9700 @param lu: the logical unit on whose behalf we execute
9701 @type instance: L{objects.Instance}
9702 @param instance: the instance whose disks we should remove
9703 @type target_node: string
9704 @param target_node: used to override the node on which to remove the disks
9706 @return: the success of the removal
9709 logging.info("Removing block devices for instance %s", instance.name)
9712 ports_to_release = set()
9713 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9714 for (idx, device) in enumerate(anno_disks):
9716 edata = [(target_node, device)]
9718 edata = device.ComputeNodeTree(instance.primary_node)
9719 for node, disk in edata:
9720 lu.cfg.SetDiskID(disk, node)
9721 result = lu.rpc.call_blockdev_remove(node, disk)
9723 lu.LogWarning("Could not remove disk %s on node %s,"
9724 " continuing anyway: %s", idx, node, result.fail_msg)
9725 if not (result.offline and node != instance.primary_node):
9728 # if this is a DRBD disk, return its port to the pool
9729 if device.dev_type in constants.LDS_DRBD:
9730 ports_to_release.add(device.logical_id[2])
9732 if all_result or ignore_failures:
9733 for port in ports_to_release:
9734 lu.cfg.AddTcpUdpPort(port)
9736 if instance.disk_template in constants.DTS_FILEBASED:
9737 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9741 tgt = instance.primary_node
9742 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9744 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9745 file_storage_dir, instance.primary_node, result.fail_msg)
9751 def _ComputeDiskSizePerVG(disk_template, disks):
9752 """Compute disk size requirements in the volume group
9755 def _compute(disks, payload):
9756 """Universal algorithm.
9761 vgs[disk[constants.IDISK_VG]] = \
9762 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9766 # Required free disk space as a function of disk and swap space
9768 constants.DT_DISKLESS: {},
9769 constants.DT_PLAIN: _compute(disks, 0),
9770 # 128 MB are added for drbd metadata for each disk
9771 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9772 constants.DT_FILE: {},
9773 constants.DT_SHARED_FILE: {},
9776 if disk_template not in req_size_dict:
9777 raise errors.ProgrammerError("Disk template '%s' size requirement"
9778 " is unknown" % disk_template)
9780 return req_size_dict[disk_template]
9783 def _FilterVmNodes(lu, nodenames):
9784 """Filters out non-vm_capable nodes from a list.
9786 @type lu: L{LogicalUnit}
9787 @param lu: the logical unit for which we check
9788 @type nodenames: list
9789 @param nodenames: the list of nodes on which we should check
9791 @return: the list of vm-capable nodes
9794 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9795 return [name for name in nodenames if name not in vm_nodes]
9798 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9799 """Hypervisor parameter validation.
9801 This function abstract the hypervisor parameter validation to be
9802 used in both instance create and instance modify.
9804 @type lu: L{LogicalUnit}
9805 @param lu: the logical unit for which we check
9806 @type nodenames: list
9807 @param nodenames: the list of nodes on which we should check
9808 @type hvname: string
9809 @param hvname: the name of the hypervisor we should use
9810 @type hvparams: dict
9811 @param hvparams: the parameters which we need to check
9812 @raise errors.OpPrereqError: if the parameters are not valid
9815 nodenames = _FilterVmNodes(lu, nodenames)
9817 cluster = lu.cfg.GetClusterInfo()
9818 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9820 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9821 for node in nodenames:
9825 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9828 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9829 """OS parameters validation.
9831 @type lu: L{LogicalUnit}
9832 @param lu: the logical unit for which we check
9833 @type required: boolean
9834 @param required: whether the validation should fail if the OS is not
9836 @type nodenames: list
9837 @param nodenames: the list of nodes on which we should check
9838 @type osname: string
9839 @param osname: the name of the hypervisor we should use
9840 @type osparams: dict
9841 @param osparams: the parameters which we need to check
9842 @raise errors.OpPrereqError: if the parameters are not valid
9845 nodenames = _FilterVmNodes(lu, nodenames)
9846 result = lu.rpc.call_os_validate(nodenames, required, osname,
9847 [constants.OS_VALIDATE_PARAMETERS],
9849 for node, nres in result.items():
9850 # we don't check for offline cases since this should be run only
9851 # against the master node and/or an instance's nodes
9852 nres.Raise("OS Parameters validation failed on node %s" % node)
9853 if not nres.payload:
9854 lu.LogInfo("OS %s not found on node %s, validation skipped",
9858 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9859 """Wrapper around IAReqInstanceAlloc.
9861 @param op: The instance opcode
9862 @param disks: The computed disks
9863 @param nics: The computed nics
9864 @param beparams: The full filled beparams
9865 @param node_whitelist: List of nodes which should appear as online to the
9866 allocator (unless the node is already marked offline)
9868 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9871 spindle_use = beparams[constants.BE_SPINDLE_USE]
9872 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9873 disk_template=op.disk_template,
9876 vcpus=beparams[constants.BE_VCPUS],
9877 memory=beparams[constants.BE_MAXMEM],
9878 spindle_use=spindle_use,
9880 nics=[n.ToDict() for n in nics],
9881 hypervisor=op.hypervisor,
9882 node_whitelist=node_whitelist)
9885 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9886 """Computes the nics.
9888 @param op: The instance opcode
9889 @param cluster: Cluster configuration object
9890 @param default_ip: The default ip to assign
9891 @param cfg: An instance of the configuration object
9892 @param ec_id: Execution context ID
9894 @returns: The build up nics
9899 nic_mode_req = nic.get(constants.INIC_MODE, None)
9900 nic_mode = nic_mode_req
9901 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9902 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9904 net = nic.get(constants.INIC_NETWORK, None)
9905 link = nic.get(constants.NIC_LINK, None)
9906 ip = nic.get(constants.INIC_IP, None)
9908 if net is None or net.lower() == constants.VALUE_NONE:
9911 if nic_mode_req is not None or link is not None:
9912 raise errors.OpPrereqError("If network is given, no mode or link"
9913 " is allowed to be passed",
9916 # ip validity checks
9917 if ip is None or ip.lower() == constants.VALUE_NONE:
9919 elif ip.lower() == constants.VALUE_AUTO:
9920 if not op.name_check:
9921 raise errors.OpPrereqError("IP address set to auto but name checks"
9922 " have been skipped",
9926 # We defer pool operations until later, so that the iallocator has
9927 # filled in the instance's node(s) dimara
9928 if ip.lower() == constants.NIC_IP_POOL:
9930 raise errors.OpPrereqError("if ip=pool, parameter network"
9931 " must be passed too",
9934 elif not netutils.IPAddress.IsValid(ip):
9935 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9940 # TODO: check the ip address for uniqueness
9941 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9942 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9945 # MAC address verification
9946 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9947 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9948 mac = utils.NormalizeAndValidateMac(mac)
9951 # TODO: We need to factor this out
9952 cfg.ReserveMAC(mac, ec_id)
9953 except errors.ReservationError:
9954 raise errors.OpPrereqError("MAC address %s already in use"
9955 " in cluster" % mac,
9956 errors.ECODE_NOTUNIQUE)
9958 # Build nic parameters
9961 nicparams[constants.NIC_MODE] = nic_mode
9963 nicparams[constants.NIC_LINK] = link
9965 check_params = cluster.SimpleFillNIC(nicparams)
9966 objects.NIC.CheckParameterSyntax(check_params)
9967 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968 network=net, nicparams=nicparams))
9973 def _ComputeDisks(op, default_vg):
9974 """Computes the instance disks.
9976 @param op: The instance opcode
9977 @param default_vg: The default_vg to assume
9979 @return: The computed disks
9983 for disk in op.disks:
9984 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985 if mode not in constants.DISK_ACCESS_SET:
9986 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987 mode, errors.ECODE_INVAL)
9988 size = disk.get(constants.IDISK_SIZE, None)
9990 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9993 except (TypeError, ValueError):
9994 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9997 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998 if ext_provider and op.disk_template != constants.DT_EXT:
9999 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000 " disk template, not %s" %
10001 (constants.IDISK_PROVIDER, constants.DT_EXT,
10002 op.disk_template), errors.ECODE_INVAL)
10004 data_vg = disk.get(constants.IDISK_VG, default_vg)
10006 constants.IDISK_SIZE: size,
10007 constants.IDISK_MODE: mode,
10008 constants.IDISK_VG: data_vg,
10011 if constants.IDISK_METAVG in disk:
10012 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013 if constants.IDISK_ADOPT in disk:
10014 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10016 # For extstorage, demand the `provider' option and add any
10017 # additional parameters (ext-params) to the dict
10018 if op.disk_template == constants.DT_EXT:
10020 new_disk[constants.IDISK_PROVIDER] = ext_provider
10022 if key not in constants.IDISK_PARAMS:
10023 new_disk[key] = disk[key]
10025 raise errors.OpPrereqError("Missing provider for template '%s'" %
10026 constants.DT_EXT, errors.ECODE_INVAL)
10028 disks.append(new_disk)
10033 def _ComputeFullBeParams(op, cluster):
10034 """Computes the full beparams.
10036 @param op: The instance opcode
10037 @param cluster: The cluster config object
10039 @return: The fully filled beparams
10042 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043 for param, value in op.beparams.iteritems():
10044 if value == constants.VALUE_AUTO:
10045 op.beparams[param] = default_beparams[param]
10046 objects.UpgradeBeParams(op.beparams)
10047 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048 return cluster.SimpleFillBE(op.beparams)
10051 def _CheckOpportunisticLocking(op):
10052 """Generate error if opportunistic locking is not possible.
10055 if op.opportunistic_locking and not op.iallocator:
10056 raise errors.OpPrereqError("Opportunistic locking is only available in"
10057 " combination with an instance allocator",
10058 errors.ECODE_INVAL)
10061 class LUInstanceCreate(LogicalUnit):
10062 """Create an instance.
10065 HPATH = "instance-add"
10066 HTYPE = constants.HTYPE_INSTANCE
10069 def CheckArguments(self):
10070 """Check arguments.
10073 # do not require name_check to ease forward/backward compatibility
10075 if self.op.no_install and self.op.start:
10076 self.LogInfo("No-installation mode selected, disabling startup")
10077 self.op.start = False
10078 # validate/normalize the instance name
10079 self.op.instance_name = \
10080 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10082 if self.op.ip_check and not self.op.name_check:
10083 # TODO: make the ip check more flexible and not depend on the name check
10084 raise errors.OpPrereqError("Cannot do IP address check without a name"
10085 " check", errors.ECODE_INVAL)
10087 # check nics' parameter names
10088 for nic in self.op.nics:
10089 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10091 # check disks. parameter names and consistent adopt/no-adopt strategy
10092 has_adopt = has_no_adopt = False
10093 for disk in self.op.disks:
10094 if self.op.disk_template != constants.DT_EXT:
10095 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096 if constants.IDISK_ADOPT in disk:
10099 has_no_adopt = True
10100 if has_adopt and has_no_adopt:
10101 raise errors.OpPrereqError("Either all disks are adopted or none is",
10102 errors.ECODE_INVAL)
10104 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105 raise errors.OpPrereqError("Disk adoption is not supported for the"
10106 " '%s' disk template" %
10107 self.op.disk_template,
10108 errors.ECODE_INVAL)
10109 if self.op.iallocator is not None:
10110 raise errors.OpPrereqError("Disk adoption not allowed with an"
10111 " iallocator script", errors.ECODE_INVAL)
10112 if self.op.mode == constants.INSTANCE_IMPORT:
10113 raise errors.OpPrereqError("Disk adoption not allowed for"
10114 " instance import", errors.ECODE_INVAL)
10116 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118 " but no 'adopt' parameter given" %
10119 self.op.disk_template,
10120 errors.ECODE_INVAL)
10122 self.adopt_disks = has_adopt
10124 # instance name verification
10125 if self.op.name_check:
10126 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127 self.op.instance_name = self.hostname1.name
10128 # used in CheckPrereq for ip ping check
10129 self.check_ip = self.hostname1.ip
10131 self.check_ip = None
10133 # file storage checks
10134 if (self.op.file_driver and
10135 not self.op.file_driver in constants.FILE_DRIVER):
10136 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137 self.op.file_driver, errors.ECODE_INVAL)
10139 if self.op.disk_template == constants.DT_FILE:
10140 opcodes.RequireFileStorage()
10141 elif self.op.disk_template == constants.DT_SHARED_FILE:
10142 opcodes.RequireSharedFileStorage()
10144 ### Node/iallocator related checks
10145 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10147 if self.op.pnode is not None:
10148 if self.op.disk_template in constants.DTS_INT_MIRROR:
10149 if self.op.snode is None:
10150 raise errors.OpPrereqError("The networked disk templates need"
10151 " a mirror node", errors.ECODE_INVAL)
10152 elif self.op.snode:
10153 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10155 self.op.snode = None
10157 _CheckOpportunisticLocking(self.op)
10159 self._cds = _GetClusterDomainSecret()
10161 if self.op.mode == constants.INSTANCE_IMPORT:
10162 # On import force_variant must be True, because if we forced it at
10163 # initial install, our only chance when importing it back is that it
10165 self.op.force_variant = True
10167 if self.op.no_install:
10168 self.LogInfo("No-installation mode has no effect during import")
10170 elif self.op.mode == constants.INSTANCE_CREATE:
10171 if self.op.os_type is None:
10172 raise errors.OpPrereqError("No guest OS specified",
10173 errors.ECODE_INVAL)
10174 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176 " installation" % self.op.os_type,
10177 errors.ECODE_STATE)
10178 if self.op.disk_template is None:
10179 raise errors.OpPrereqError("No disk template specified",
10180 errors.ECODE_INVAL)
10182 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183 # Check handshake to ensure both clusters have the same domain secret
10184 src_handshake = self.op.source_handshake
10185 if not src_handshake:
10186 raise errors.OpPrereqError("Missing source handshake",
10187 errors.ECODE_INVAL)
10189 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10192 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193 errors.ECODE_INVAL)
10195 # Load and check source CA
10196 self.source_x509_ca_pem = self.op.source_x509_ca
10197 if not self.source_x509_ca_pem:
10198 raise errors.OpPrereqError("Missing source X509 CA",
10199 errors.ECODE_INVAL)
10202 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10204 except OpenSSL.crypto.Error, err:
10205 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206 (err, ), errors.ECODE_INVAL)
10208 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209 if errcode is not None:
10210 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211 errors.ECODE_INVAL)
10213 self.source_x509_ca = cert
10215 src_instance_name = self.op.source_instance_name
10216 if not src_instance_name:
10217 raise errors.OpPrereqError("Missing source instance name",
10218 errors.ECODE_INVAL)
10220 self.source_instance_name = \
10221 netutils.GetHostname(name=src_instance_name).name
10224 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225 self.op.mode, errors.ECODE_INVAL)
10227 def ExpandNames(self):
10228 """ExpandNames for CreateInstance.
10230 Figure out the right locks for instance creation.
10233 self.needed_locks = {}
10235 instance_name = self.op.instance_name
10236 # this is just a preventive check, but someone might still add this
10237 # instance in the meantime, and creation will fail at lock-add time
10238 if instance_name in self.cfg.GetInstanceList():
10239 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240 instance_name, errors.ECODE_EXISTS)
10242 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10244 if self.op.iallocator:
10245 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246 # specifying a group on instance creation and then selecting nodes from
10248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10251 if self.op.opportunistic_locking:
10252 self.opportunistic_locks[locking.LEVEL_NODE] = True
10253 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10255 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256 nodelist = [self.op.pnode]
10257 if self.op.snode is not None:
10258 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259 nodelist.append(self.op.snode)
10260 self.needed_locks[locking.LEVEL_NODE] = nodelist
10262 # in case of import lock the source node too
10263 if self.op.mode == constants.INSTANCE_IMPORT:
10264 src_node = self.op.src_node
10265 src_path = self.op.src_path
10267 if src_path is None:
10268 self.op.src_path = src_path = self.op.instance_name
10270 if src_node is None:
10271 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273 self.op.src_node = None
10274 if os.path.isabs(src_path):
10275 raise errors.OpPrereqError("Importing an instance from a path"
10276 " requires a source node option",
10277 errors.ECODE_INVAL)
10279 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282 if not os.path.isabs(src_path):
10283 self.op.src_path = src_path = \
10284 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10286 self.needed_locks[locking.LEVEL_NODE_RES] = \
10287 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10289 def _RunAllocator(self):
10290 """Run the allocator based on input opcode.
10293 if self.op.opportunistic_locking:
10294 # Only consider nodes for which a lock is held
10295 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10297 node_whitelist = None
10299 #TODO Export network to iallocator so that it chooses a pnode
10300 # in a nodegroup that has the desired network connected to
10301 req = _CreateInstanceAllocRequest(self.op, self.disks,
10302 self.nics, self.be_full,
10304 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10306 ial.Run(self.op.iallocator)
10308 if not ial.success:
10309 # When opportunistic locks are used only a temporary failure is generated
10310 if self.op.opportunistic_locking:
10311 ecode = errors.ECODE_TEMP_NORES
10313 ecode = errors.ECODE_NORES
10315 raise errors.OpPrereqError("Can't compute nodes using"
10316 " iallocator '%s': %s" %
10317 (self.op.iallocator, ial.info),
10320 self.op.pnode = ial.result[0]
10321 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322 self.op.instance_name, self.op.iallocator,
10323 utils.CommaJoin(ial.result))
10325 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10327 if req.RequiredNodes() == 2:
10328 self.op.snode = ial.result[1]
10330 def BuildHooksEnv(self):
10331 """Build hooks env.
10333 This runs on master, primary and secondary nodes of the instance.
10337 "ADD_MODE": self.op.mode,
10339 if self.op.mode == constants.INSTANCE_IMPORT:
10340 env["SRC_NODE"] = self.op.src_node
10341 env["SRC_PATH"] = self.op.src_path
10342 env["SRC_IMAGES"] = self.src_images
10344 env.update(_BuildInstanceHookEnv(
10345 name=self.op.instance_name,
10346 primary_node=self.op.pnode,
10347 secondary_nodes=self.secondaries,
10348 status=self.op.start,
10349 os_type=self.op.os_type,
10350 minmem=self.be_full[constants.BE_MINMEM],
10351 maxmem=self.be_full[constants.BE_MAXMEM],
10352 vcpus=self.be_full[constants.BE_VCPUS],
10353 nics=_NICListToTuple(self, self.nics),
10354 disk_template=self.op.disk_template,
10355 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356 for d in self.disks],
10359 hypervisor_name=self.op.hypervisor,
10365 def BuildHooksNodes(self):
10366 """Build hooks nodes.
10369 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10372 def _ReadExportInfo(self):
10373 """Reads the export information from disk.
10375 It will override the opcode source node and path with the actual
10376 information, if these two were not specified before.
10378 @return: the export information
10381 assert self.op.mode == constants.INSTANCE_IMPORT
10383 src_node = self.op.src_node
10384 src_path = self.op.src_path
10386 if src_node is None:
10387 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388 exp_list = self.rpc.call_export_list(locked_nodes)
10390 for node in exp_list:
10391 if exp_list[node].fail_msg:
10393 if src_path in exp_list[node].payload:
10395 self.op.src_node = src_node = node
10396 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10400 raise errors.OpPrereqError("No export found for relative path %s" %
10401 src_path, errors.ECODE_INVAL)
10403 _CheckNodeOnline(self, src_node)
10404 result = self.rpc.call_export_info(src_node, src_path)
10405 result.Raise("No export or invalid export found in dir %s" % src_path)
10407 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408 if not export_info.has_section(constants.INISECT_EXP):
10409 raise errors.ProgrammerError("Corrupted export config",
10410 errors.ECODE_ENVIRON)
10412 ei_version = export_info.get(constants.INISECT_EXP, "version")
10413 if (int(ei_version) != constants.EXPORT_VERSION):
10414 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415 (ei_version, constants.EXPORT_VERSION),
10416 errors.ECODE_ENVIRON)
10419 def _ReadExportParams(self, einfo):
10420 """Use export parameters as defaults.
10422 In case the opcode doesn't specify (as in override) some instance
10423 parameters, then try to use them from the export information, if
10424 that declares them.
10427 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10429 if self.op.disk_template is None:
10430 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431 self.op.disk_template = einfo.get(constants.INISECT_INS,
10433 if self.op.disk_template not in constants.DISK_TEMPLATES:
10434 raise errors.OpPrereqError("Disk template specified in configuration"
10435 " file is not one of the allowed values:"
10437 " ".join(constants.DISK_TEMPLATES),
10438 errors.ECODE_INVAL)
10440 raise errors.OpPrereqError("No disk template specified and the export"
10441 " is missing the disk_template information",
10442 errors.ECODE_INVAL)
10444 if not self.op.disks:
10446 # TODO: import the disk iv_name too
10447 for idx in range(constants.MAX_DISKS):
10448 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450 disks.append({constants.IDISK_SIZE: disk_sz})
10451 self.op.disks = disks
10452 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453 raise errors.OpPrereqError("No disk info specified and the export"
10454 " is missing the disk information",
10455 errors.ECODE_INVAL)
10457 if not self.op.nics:
10459 for idx in range(constants.MAX_NICS):
10460 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10462 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10468 self.op.nics = nics
10470 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10473 if (self.op.hypervisor is None and
10474 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10477 if einfo.has_section(constants.INISECT_HYP):
10478 # use the export parameters but do not override the ones
10479 # specified by the user
10480 for name, value in einfo.items(constants.INISECT_HYP):
10481 if name not in self.op.hvparams:
10482 self.op.hvparams[name] = value
10484 if einfo.has_section(constants.INISECT_BEP):
10485 # use the parameters, without overriding
10486 for name, value in einfo.items(constants.INISECT_BEP):
10487 if name not in self.op.beparams:
10488 self.op.beparams[name] = value
10489 # Compatibility for the old "memory" be param
10490 if name == constants.BE_MEMORY:
10491 if constants.BE_MAXMEM not in self.op.beparams:
10492 self.op.beparams[constants.BE_MAXMEM] = value
10493 if constants.BE_MINMEM not in self.op.beparams:
10494 self.op.beparams[constants.BE_MINMEM] = value
10496 # try to read the parameters old style, from the main section
10497 for name in constants.BES_PARAMETERS:
10498 if (name not in self.op.beparams and
10499 einfo.has_option(constants.INISECT_INS, name)):
10500 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10502 if einfo.has_section(constants.INISECT_OSP):
10503 # use the parameters, without overriding
10504 for name, value in einfo.items(constants.INISECT_OSP):
10505 if name not in self.op.osparams:
10506 self.op.osparams[name] = value
10508 def _RevertToDefaults(self, cluster):
10509 """Revert the instance parameters to the default values.
10513 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514 for name in self.op.hvparams.keys():
10515 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516 del self.op.hvparams[name]
10518 be_defs = cluster.SimpleFillBE({})
10519 for name in self.op.beparams.keys():
10520 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521 del self.op.beparams[name]
10523 nic_defs = cluster.SimpleFillNIC({})
10524 for nic in self.op.nics:
10525 for name in constants.NICS_PARAMETERS:
10526 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10529 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530 for name in self.op.osparams.keys():
10531 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532 del self.op.osparams[name]
10534 def _CalculateFileStorageDir(self):
10535 """Calculate final instance file storage dir.
10538 # file storage dir calculation/check
10539 self.instance_file_storage_dir = None
10540 if self.op.disk_template in constants.DTS_FILEBASED:
10541 # build the full file storage dir path
10544 if self.op.disk_template == constants.DT_SHARED_FILE:
10545 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10547 get_fsd_fn = self.cfg.GetFileStorageDir
10549 cfg_storagedir = get_fsd_fn()
10550 if not cfg_storagedir:
10551 raise errors.OpPrereqError("Cluster file storage dir not defined",
10552 errors.ECODE_STATE)
10553 joinargs.append(cfg_storagedir)
10555 if self.op.file_storage_dir is not None:
10556 joinargs.append(self.op.file_storage_dir)
10558 joinargs.append(self.op.instance_name)
10560 # pylint: disable=W0142
10561 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10563 def CheckPrereq(self): # pylint: disable=R0914
10564 """Check prerequisites.
10567 self._CalculateFileStorageDir()
10569 if self.op.mode == constants.INSTANCE_IMPORT:
10570 export_info = self._ReadExportInfo()
10571 self._ReadExportParams(export_info)
10572 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10574 self._old_instance_name = None
10576 if (not self.cfg.GetVGName() and
10577 self.op.disk_template not in constants.DTS_NOT_LVM):
10578 raise errors.OpPrereqError("Cluster does not support lvm-based"
10579 " instances", errors.ECODE_STATE)
10581 if (self.op.hypervisor is None or
10582 self.op.hypervisor == constants.VALUE_AUTO):
10583 self.op.hypervisor = self.cfg.GetHypervisorType()
10585 cluster = self.cfg.GetClusterInfo()
10586 enabled_hvs = cluster.enabled_hypervisors
10587 if self.op.hypervisor not in enabled_hvs:
10588 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10590 (self.op.hypervisor, ",".join(enabled_hvs)),
10591 errors.ECODE_STATE)
10593 # Check tag validity
10594 for tag in self.op.tags:
10595 objects.TaggableObject.ValidateTag(tag)
10597 # check hypervisor parameter syntax (locally)
10598 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10601 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602 hv_type.CheckParameterSyntax(filled_hvp)
10603 self.hv_full = filled_hvp
10604 # check that we don't specify global parameters on an instance
10605 _CheckGlobalHvParams(self.op.hvparams)
10607 # fill and remember the beparams dict
10608 self.be_full = _ComputeFullBeParams(self.op, cluster)
10610 # build os parameters
10611 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10613 # now that hvp/bep are in final format, let's reset to defaults,
10615 if self.op.identify_defaults:
10616 self._RevertToDefaults(cluster)
10619 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10620 self.proc.GetECId())
10622 # disk checks/pre-build
10623 default_vg = self.cfg.GetVGName()
10624 self.disks = _ComputeDisks(self.op, default_vg)
10626 if self.op.mode == constants.INSTANCE_IMPORT:
10628 for idx in range(len(self.disks)):
10629 option = "disk%d_dump" % idx
10630 if export_info.has_option(constants.INISECT_INS, option):
10631 # FIXME: are the old os-es, disk sizes, etc. useful?
10632 export_name = export_info.get(constants.INISECT_INS, option)
10633 image = utils.PathJoin(self.op.src_path, export_name)
10634 disk_images.append(image)
10636 disk_images.append(False)
10638 self.src_images = disk_images
10640 if self.op.instance_name == self._old_instance_name:
10641 for idx, nic in enumerate(self.nics):
10642 if nic.mac == constants.VALUE_AUTO:
10643 nic_mac_ini = "nic%d_mac" % idx
10644 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10646 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10648 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10649 if self.op.ip_check:
10650 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10651 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10652 (self.check_ip, self.op.instance_name),
10653 errors.ECODE_NOTUNIQUE)
10655 #### mac address generation
10656 # By generating here the mac address both the allocator and the hooks get
10657 # the real final mac address rather than the 'auto' or 'generate' value.
10658 # There is a race condition between the generation and the instance object
10659 # creation, which means that we know the mac is valid now, but we're not
10660 # sure it will be when we actually add the instance. If things go bad
10661 # adding the instance will abort because of a duplicate mac, and the
10662 # creation job will fail.
10663 for nic in self.nics:
10664 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10665 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10669 if self.op.iallocator is not None:
10670 self._RunAllocator()
10672 # Release all unneeded node locks
10673 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10674 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10675 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10676 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10678 assert (self.owned_locks(locking.LEVEL_NODE) ==
10679 self.owned_locks(locking.LEVEL_NODE_RES)), \
10680 "Node locks differ from node resource locks"
10682 #### node related checks
10684 # check primary node
10685 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10686 assert self.pnode is not None, \
10687 "Cannot retrieve locked node %s" % self.op.pnode
10689 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10690 pnode.name, errors.ECODE_STATE)
10692 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10693 pnode.name, errors.ECODE_STATE)
10694 if not pnode.vm_capable:
10695 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10696 " '%s'" % pnode.name, errors.ECODE_STATE)
10698 self.secondaries = []
10700 # Fill in any IPs from IP pools. This must happen here, because we need to
10701 # know the nic's primary node, as specified by the iallocator
10702 for idx, nic in enumerate(self.nics):
10704 if net is not None:
10705 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10706 if netparams is None:
10707 raise errors.OpPrereqError("No netparams found for network"
10708 " %s. Propably not connected to"
10709 " node's %s nodegroup" %
10710 (net, self.pnode.name),
10711 errors.ECODE_INVAL)
10712 self.LogInfo("NIC/%d inherits netparams %s" %
10713 (idx, netparams.values()))
10714 nic.nicparams = dict(netparams)
10715 if nic.ip is not None:
10716 if nic.ip.lower() == constants.NIC_IP_POOL:
10718 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10719 except errors.ReservationError:
10720 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10721 " from the address pool" % idx,
10722 errors.ECODE_STATE)
10723 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10726 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10727 except errors.ReservationError:
10728 raise errors.OpPrereqError("IP address %s already in use"
10729 " or does not belong to network %s" %
10731 errors.ECODE_NOTUNIQUE)
10733 # net is None, ip None or given
10734 elif self.op.conflicts_check:
10735 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10737 # mirror node verification
10738 if self.op.disk_template in constants.DTS_INT_MIRROR:
10739 if self.op.snode == pnode.name:
10740 raise errors.OpPrereqError("The secondary node cannot be the"
10741 " primary node", errors.ECODE_INVAL)
10742 _CheckNodeOnline(self, self.op.snode)
10743 _CheckNodeNotDrained(self, self.op.snode)
10744 _CheckNodeVmCapable(self, self.op.snode)
10745 self.secondaries.append(self.op.snode)
10747 snode = self.cfg.GetNodeInfo(self.op.snode)
10748 if pnode.group != snode.group:
10749 self.LogWarning("The primary and secondary nodes are in two"
10750 " different node groups; the disk parameters"
10751 " from the first disk's node group will be"
10754 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10756 if self.op.disk_template in constants.DTS_INT_MIRROR:
10757 nodes.append(snode)
10758 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10759 if compat.any(map(has_es, nodes)):
10760 raise errors.OpPrereqError("Disk template %s not supported with"
10761 " exclusive storage" % self.op.disk_template,
10762 errors.ECODE_STATE)
10764 nodenames = [pnode.name] + self.secondaries
10766 # Verify instance specs
10767 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10769 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10770 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10771 constants.ISPEC_DISK_COUNT: len(self.disks),
10772 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10773 constants.ISPEC_NIC_COUNT: len(self.nics),
10774 constants.ISPEC_SPINDLE_USE: spindle_use,
10777 group_info = self.cfg.GetNodeGroup(pnode.group)
10778 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10779 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10780 if not self.op.ignore_ipolicy and res:
10781 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10782 (pnode.group, group_info.name, utils.CommaJoin(res)))
10783 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10785 if not self.adopt_disks:
10786 if self.op.disk_template == constants.DT_RBD:
10787 # _CheckRADOSFreeSpace() is just a placeholder.
10788 # Any function that checks prerequisites can be placed here.
10789 # Check if there is enough space on the RADOS cluster.
10790 _CheckRADOSFreeSpace()
10791 elif self.op.disk_template == constants.DT_EXT:
10792 # FIXME: Function that checks prereqs if needed
10795 # Check lv size requirements, if not adopting
10796 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10797 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10799 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10800 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10801 disk[constants.IDISK_ADOPT])
10802 for disk in self.disks])
10803 if len(all_lvs) != len(self.disks):
10804 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10805 errors.ECODE_INVAL)
10806 for lv_name in all_lvs:
10808 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10809 # to ReserveLV uses the same syntax
10810 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10811 except errors.ReservationError:
10812 raise errors.OpPrereqError("LV named %s used by another instance" %
10813 lv_name, errors.ECODE_NOTUNIQUE)
10815 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10816 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10818 node_lvs = self.rpc.call_lv_list([pnode.name],
10819 vg_names.payload.keys())[pnode.name]
10820 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10821 node_lvs = node_lvs.payload
10823 delta = all_lvs.difference(node_lvs.keys())
10825 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10826 utils.CommaJoin(delta),
10827 errors.ECODE_INVAL)
10828 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10830 raise errors.OpPrereqError("Online logical volumes found, cannot"
10831 " adopt: %s" % utils.CommaJoin(online_lvs),
10832 errors.ECODE_STATE)
10833 # update the size of disk based on what is found
10834 for dsk in self.disks:
10835 dsk[constants.IDISK_SIZE] = \
10836 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10837 dsk[constants.IDISK_ADOPT])][0]))
10839 elif self.op.disk_template == constants.DT_BLOCK:
10840 # Normalize and de-duplicate device paths
10841 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10842 for disk in self.disks])
10843 if len(all_disks) != len(self.disks):
10844 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10845 errors.ECODE_INVAL)
10846 baddisks = [d for d in all_disks
10847 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10849 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10850 " cannot be adopted" %
10851 (utils.CommaJoin(baddisks),
10852 constants.ADOPTABLE_BLOCKDEV_ROOT),
10853 errors.ECODE_INVAL)
10855 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10856 list(all_disks))[pnode.name]
10857 node_disks.Raise("Cannot get block device information from node %s" %
10859 node_disks = node_disks.payload
10860 delta = all_disks.difference(node_disks.keys())
10862 raise errors.OpPrereqError("Missing block device(s): %s" %
10863 utils.CommaJoin(delta),
10864 errors.ECODE_INVAL)
10865 for dsk in self.disks:
10866 dsk[constants.IDISK_SIZE] = \
10867 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10869 # Verify instance specs
10870 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10872 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10873 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10874 constants.ISPEC_DISK_COUNT: len(self.disks),
10875 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10876 for disk in self.disks],
10877 constants.ISPEC_NIC_COUNT: len(self.nics),
10878 constants.ISPEC_SPINDLE_USE: spindle_use,
10881 group_info = self.cfg.GetNodeGroup(pnode.group)
10882 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10883 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10884 if not self.op.ignore_ipolicy and res:
10885 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10886 " policy: %s") % (pnode.group,
10887 utils.CommaJoin(res)),
10888 errors.ECODE_INVAL)
10890 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10892 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10893 # check OS parameters (remotely)
10894 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10896 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10898 #TODO: _CheckExtParams (remotely)
10899 # Check parameters for extstorage
10901 # memory check on primary node
10902 #TODO(dynmem): use MINMEM for checking
10904 _CheckNodeFreeMemory(self, self.pnode.name,
10905 "creating instance %s" % self.op.instance_name,
10906 self.be_full[constants.BE_MAXMEM],
10907 self.op.hypervisor)
10909 self.dry_run_result = list(nodenames)
10911 def Exec(self, feedback_fn):
10912 """Create and add the instance to the cluster.
10915 instance = self.op.instance_name
10916 pnode_name = self.pnode.name
10918 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10919 self.owned_locks(locking.LEVEL_NODE)), \
10920 "Node locks differ from node resource locks"
10921 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10923 ht_kind = self.op.hypervisor
10924 if ht_kind in constants.HTS_REQ_PORT:
10925 network_port = self.cfg.AllocatePort()
10927 network_port = None
10929 # This is ugly but we got a chicken-egg problem here
10930 # We can only take the group disk parameters, as the instance
10931 # has no disks yet (we are generating them right here).
10932 node = self.cfg.GetNodeInfo(pnode_name)
10933 nodegroup = self.cfg.GetNodeGroup(node.group)
10934 disks = _GenerateDiskTemplate(self,
10935 self.op.disk_template,
10936 instance, pnode_name,
10939 self.instance_file_storage_dir,
10940 self.op.file_driver,
10943 self.cfg.GetGroupDiskParams(nodegroup))
10945 iobj = objects.Instance(name=instance, os=self.op.os_type,
10946 primary_node=pnode_name,
10947 nics=self.nics, disks=disks,
10948 disk_template=self.op.disk_template,
10949 admin_state=constants.ADMINST_DOWN,
10950 network_port=network_port,
10951 beparams=self.op.beparams,
10952 hvparams=self.op.hvparams,
10953 hypervisor=self.op.hypervisor,
10954 osparams=self.op.osparams,
10958 for tag in self.op.tags:
10961 if self.adopt_disks:
10962 if self.op.disk_template == constants.DT_PLAIN:
10963 # rename LVs to the newly-generated names; we need to construct
10964 # 'fake' LV disks with the old data, plus the new unique_id
10965 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10967 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10968 rename_to.append(t_dsk.logical_id)
10969 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10970 self.cfg.SetDiskID(t_dsk, pnode_name)
10971 result = self.rpc.call_blockdev_rename(pnode_name,
10972 zip(tmp_disks, rename_to))
10973 result.Raise("Failed to rename adoped LVs")
10975 feedback_fn("* creating instance disks...")
10977 _CreateDisks(self, iobj)
10978 except errors.OpExecError:
10979 self.LogWarning("Device creation failed, reverting...")
10981 _RemoveDisks(self, iobj)
10983 self.cfg.ReleaseDRBDMinors(instance)
10986 feedback_fn("adding instance %s to cluster config" % instance)
10988 self.cfg.AddInstance(iobj, self.proc.GetECId())
10990 # Declare that we don't want to remove the instance lock anymore, as we've
10991 # added the instance to the config
10992 del self.remove_locks[locking.LEVEL_INSTANCE]
10994 if self.op.mode == constants.INSTANCE_IMPORT:
10995 # Release unused nodes
10996 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10998 # Release all nodes
10999 _ReleaseLocks(self, locking.LEVEL_NODE)
11002 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11003 feedback_fn("* wiping instance disks...")
11005 _WipeDisks(self, iobj)
11006 except errors.OpExecError, err:
11007 logging.exception("Wiping disks failed")
11008 self.LogWarning("Wiping instance disks failed (%s)", err)
11012 # Something is already wrong with the disks, don't do anything else
11014 elif self.op.wait_for_sync:
11015 disk_abort = not _WaitForSync(self, iobj)
11016 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11017 # make sure the disks are not degraded (still sync-ing is ok)
11018 feedback_fn("* checking mirrors status")
11019 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11024 _RemoveDisks(self, iobj)
11025 self.cfg.RemoveInstance(iobj.name)
11026 # Make sure the instance lock gets removed
11027 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11028 raise errors.OpExecError("There are some degraded disks for"
11031 # Release all node resource locks
11032 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11034 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11035 # we need to set the disks ID to the primary node, since the
11036 # preceding code might or might have not done it, depending on
11037 # disk template and other options
11038 for disk in iobj.disks:
11039 self.cfg.SetDiskID(disk, pnode_name)
11040 if self.op.mode == constants.INSTANCE_CREATE:
11041 if not self.op.no_install:
11042 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11043 not self.op.wait_for_sync)
11045 feedback_fn("* pausing disk sync to install instance OS")
11046 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11049 for idx, success in enumerate(result.payload):
11051 logging.warn("pause-sync of instance %s for disk %d failed",
11054 feedback_fn("* running the instance OS create scripts...")
11055 # FIXME: pass debug option from opcode to backend
11057 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11058 self.op.debug_level)
11060 feedback_fn("* resuming disk sync")
11061 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11064 for idx, success in enumerate(result.payload):
11066 logging.warn("resume-sync of instance %s for disk %d failed",
11069 os_add_result.Raise("Could not add os for instance %s"
11070 " on node %s" % (instance, pnode_name))
11073 if self.op.mode == constants.INSTANCE_IMPORT:
11074 feedback_fn("* running the instance OS import scripts...")
11078 for idx, image in enumerate(self.src_images):
11082 # FIXME: pass debug option from opcode to backend
11083 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11084 constants.IEIO_FILE, (image, ),
11085 constants.IEIO_SCRIPT,
11086 (iobj.disks[idx], idx),
11088 transfers.append(dt)
11091 masterd.instance.TransferInstanceData(self, feedback_fn,
11092 self.op.src_node, pnode_name,
11093 self.pnode.secondary_ip,
11095 if not compat.all(import_result):
11096 self.LogWarning("Some disks for instance %s on node %s were not"
11097 " imported successfully" % (instance, pnode_name))
11099 rename_from = self._old_instance_name
11101 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11102 feedback_fn("* preparing remote import...")
11103 # The source cluster will stop the instance before attempting to make
11104 # a connection. In some cases stopping an instance can take a long
11105 # time, hence the shutdown timeout is added to the connection
11107 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11108 self.op.source_shutdown_timeout)
11109 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11111 assert iobj.primary_node == self.pnode.name
11113 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11114 self.source_x509_ca,
11115 self._cds, timeouts)
11116 if not compat.all(disk_results):
11117 # TODO: Should the instance still be started, even if some disks
11118 # failed to import (valid for local imports, too)?
11119 self.LogWarning("Some disks for instance %s on node %s were not"
11120 " imported successfully" % (instance, pnode_name))
11122 rename_from = self.source_instance_name
11125 # also checked in the prereq part
11126 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11129 # Run rename script on newly imported instance
11130 assert iobj.name == instance
11131 feedback_fn("Running rename script for %s" % instance)
11132 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11134 self.op.debug_level)
11135 if result.fail_msg:
11136 self.LogWarning("Failed to run rename script for %s on node"
11137 " %s: %s" % (instance, pnode_name, result.fail_msg))
11139 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11142 iobj.admin_state = constants.ADMINST_UP
11143 self.cfg.Update(iobj, feedback_fn)
11144 logging.info("Starting instance %s on node %s", instance, pnode_name)
11145 feedback_fn("* starting instance...")
11146 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11148 result.Raise("Could not start instance")
11150 return list(iobj.all_nodes)
11153 class LUInstanceMultiAlloc(NoHooksLU):
11154 """Allocates multiple instances at the same time.
11159 def CheckArguments(self):
11160 """Check arguments.
11164 for inst in self.op.instances:
11165 if inst.iallocator is not None:
11166 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11167 " instance objects", errors.ECODE_INVAL)
11168 nodes.append(bool(inst.pnode))
11169 if inst.disk_template in constants.DTS_INT_MIRROR:
11170 nodes.append(bool(inst.snode))
11172 has_nodes = compat.any(nodes)
11173 if compat.all(nodes) ^ has_nodes:
11174 raise errors.OpPrereqError("There are instance objects providing"
11175 " pnode/snode while others do not",
11176 errors.ECODE_INVAL)
11178 if self.op.iallocator is None:
11179 default_iallocator = self.cfg.GetDefaultIAllocator()
11180 if default_iallocator and has_nodes:
11181 self.op.iallocator = default_iallocator
11183 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11184 " given and no cluster-wide default"
11185 " iallocator found; please specify either"
11186 " an iallocator or nodes on the instances"
11187 " or set a cluster-wide default iallocator",
11188 errors.ECODE_INVAL)
11190 _CheckOpportunisticLocking(self.op)
11192 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11194 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11195 utils.CommaJoin(dups), errors.ECODE_INVAL)
11197 def ExpandNames(self):
11198 """Calculate the locks.
11201 self.share_locks = _ShareAll()
11202 self.needed_locks = {
11203 # iallocator will select nodes and even if no iallocator is used,
11204 # collisions with LUInstanceCreate should be avoided
11205 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11208 if self.op.iallocator:
11209 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11210 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11212 if self.op.opportunistic_locking:
11213 self.opportunistic_locks[locking.LEVEL_NODE] = True
11214 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11217 for inst in self.op.instances:
11218 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11219 nodeslist.append(inst.pnode)
11220 if inst.snode is not None:
11221 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11222 nodeslist.append(inst.snode)
11224 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11225 # Lock resources of instance's primary and secondary nodes (copy to
11226 # prevent accidential modification)
11227 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11229 def CheckPrereq(self):
11230 """Check prerequisite.
11233 cluster = self.cfg.GetClusterInfo()
11234 default_vg = self.cfg.GetVGName()
11235 ec_id = self.proc.GetECId()
11237 if self.op.opportunistic_locking:
11238 # Only consider nodes for which a lock is held
11239 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11241 node_whitelist = None
11243 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11244 _ComputeNics(op, cluster, None,
11246 _ComputeFullBeParams(op, cluster),
11248 for op in self.op.instances]
11250 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11251 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11253 ial.Run(self.op.iallocator)
11255 if not ial.success:
11256 raise errors.OpPrereqError("Can't compute nodes using"
11257 " iallocator '%s': %s" %
11258 (self.op.iallocator, ial.info),
11259 errors.ECODE_NORES)
11261 self.ia_result = ial.result
11263 if self.op.dry_run:
11264 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11265 constants.JOB_IDS_KEY: [],
11268 def _ConstructPartialResult(self):
11269 """Contructs the partial result.
11272 (allocatable, failed) = self.ia_result
11274 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11275 map(compat.fst, allocatable),
11276 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11279 def Exec(self, feedback_fn):
11280 """Executes the opcode.
11283 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11284 (allocatable, failed) = self.ia_result
11287 for (name, nodes) in allocatable:
11288 op = op2inst.pop(name)
11291 (op.pnode, op.snode) = nodes
11293 (op.pnode,) = nodes
11297 missing = set(op2inst.keys()) - set(failed)
11298 assert not missing, \
11299 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11301 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11304 def _CheckRADOSFreeSpace():
11305 """Compute disk size requirements inside the RADOS cluster.
11308 # For the RADOS cluster we assume there is always enough space.
11312 class LUInstanceConsole(NoHooksLU):
11313 """Connect to an instance's console.
11315 This is somewhat special in that it returns the command line that
11316 you need to run on the master node in order to connect to the
11322 def ExpandNames(self):
11323 self.share_locks = _ShareAll()
11324 self._ExpandAndLockInstance()
11326 def CheckPrereq(self):
11327 """Check prerequisites.
11329 This checks that the instance is in the cluster.
11332 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11333 assert self.instance is not None, \
11334 "Cannot retrieve locked instance %s" % self.op.instance_name
11335 _CheckNodeOnline(self, self.instance.primary_node)
11337 def Exec(self, feedback_fn):
11338 """Connect to the console of an instance
11341 instance = self.instance
11342 node = instance.primary_node
11344 node_insts = self.rpc.call_instance_list([node],
11345 [instance.hypervisor])[node]
11346 node_insts.Raise("Can't get node information from %s" % node)
11348 if instance.name not in node_insts.payload:
11349 if instance.admin_state == constants.ADMINST_UP:
11350 state = constants.INSTST_ERRORDOWN
11351 elif instance.admin_state == constants.ADMINST_DOWN:
11352 state = constants.INSTST_ADMINDOWN
11354 state = constants.INSTST_ADMINOFFLINE
11355 raise errors.OpExecError("Instance %s is not running (state %s)" %
11356 (instance.name, state))
11358 logging.debug("Connecting to console of %s on %s", instance.name, node)
11360 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11363 def _GetInstanceConsole(cluster, instance):
11364 """Returns console information for an instance.
11366 @type cluster: L{objects.Cluster}
11367 @type instance: L{objects.Instance}
11371 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11372 # beparams and hvparams are passed separately, to avoid editing the
11373 # instance and then saving the defaults in the instance itself.
11374 hvparams = cluster.FillHV(instance)
11375 beparams = cluster.FillBE(instance)
11376 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11378 assert console.instance == instance.name
11379 assert console.Validate()
11381 return console.ToDict()
11384 class LUInstanceReplaceDisks(LogicalUnit):
11385 """Replace the disks of an instance.
11388 HPATH = "mirrors-replace"
11389 HTYPE = constants.HTYPE_INSTANCE
11392 def CheckArguments(self):
11393 """Check arguments.
11396 remote_node = self.op.remote_node
11397 ialloc = self.op.iallocator
11398 if self.op.mode == constants.REPLACE_DISK_CHG:
11399 if remote_node is None and ialloc is None:
11400 raise errors.OpPrereqError("When changing the secondary either an"
11401 " iallocator script must be used or the"
11402 " new node given", errors.ECODE_INVAL)
11404 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11406 elif remote_node is not None or ialloc is not None:
11407 # Not replacing the secondary
11408 raise errors.OpPrereqError("The iallocator and new node options can"
11409 " only be used when changing the"
11410 " secondary node", errors.ECODE_INVAL)
11412 def ExpandNames(self):
11413 self._ExpandAndLockInstance()
11415 assert locking.LEVEL_NODE not in self.needed_locks
11416 assert locking.LEVEL_NODE_RES not in self.needed_locks
11417 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11419 assert self.op.iallocator is None or self.op.remote_node is None, \
11420 "Conflicting options"
11422 if self.op.remote_node is not None:
11423 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11425 # Warning: do not remove the locking of the new secondary here
11426 # unless DRBD8.AddChildren is changed to work in parallel;
11427 # currently it doesn't since parallel invocations of
11428 # FindUnusedMinor will conflict
11429 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11430 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11432 self.needed_locks[locking.LEVEL_NODE] = []
11433 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11435 if self.op.iallocator is not None:
11436 # iallocator will select a new node in the same group
11437 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11438 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11440 self.needed_locks[locking.LEVEL_NODE_RES] = []
11442 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11443 self.op.iallocator, self.op.remote_node,
11444 self.op.disks, self.op.early_release,
11445 self.op.ignore_ipolicy)
11447 self.tasklets = [self.replacer]
11449 def DeclareLocks(self, level):
11450 if level == locking.LEVEL_NODEGROUP:
11451 assert self.op.remote_node is None
11452 assert self.op.iallocator is not None
11453 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11455 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11456 # Lock all groups used by instance optimistically; this requires going
11457 # via the node before it's locked, requiring verification later on
11458 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11459 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11461 elif level == locking.LEVEL_NODE:
11462 if self.op.iallocator is not None:
11463 assert self.op.remote_node is None
11464 assert not self.needed_locks[locking.LEVEL_NODE]
11465 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11467 # Lock member nodes of all locked groups
11468 self.needed_locks[locking.LEVEL_NODE] = \
11470 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11471 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11473 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11475 self._LockInstancesNodes()
11477 elif level == locking.LEVEL_NODE_RES:
11479 self.needed_locks[locking.LEVEL_NODE_RES] = \
11480 self.needed_locks[locking.LEVEL_NODE]
11482 def BuildHooksEnv(self):
11483 """Build hooks env.
11485 This runs on the master, the primary and all the secondaries.
11488 instance = self.replacer.instance
11490 "MODE": self.op.mode,
11491 "NEW_SECONDARY": self.op.remote_node,
11492 "OLD_SECONDARY": instance.secondary_nodes[0],
11494 env.update(_BuildInstanceHookEnvByObject(self, instance))
11497 def BuildHooksNodes(self):
11498 """Build hooks nodes.
11501 instance = self.replacer.instance
11503 self.cfg.GetMasterNode(),
11504 instance.primary_node,
11506 if self.op.remote_node is not None:
11507 nl.append(self.op.remote_node)
11510 def CheckPrereq(self):
11511 """Check prerequisites.
11514 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11515 self.op.iallocator is None)
11517 # Verify if node group locks are still correct
11518 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11520 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11522 return LogicalUnit.CheckPrereq(self)
11525 class TLReplaceDisks(Tasklet):
11526 """Replaces disks for an instance.
11528 Note: Locking is not within the scope of this class.
11531 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11532 disks, early_release, ignore_ipolicy):
11533 """Initializes this class.
11536 Tasklet.__init__(self, lu)
11539 self.instance_name = instance_name
11541 self.iallocator_name = iallocator_name
11542 self.remote_node = remote_node
11544 self.early_release = early_release
11545 self.ignore_ipolicy = ignore_ipolicy
11548 self.instance = None
11549 self.new_node = None
11550 self.target_node = None
11551 self.other_node = None
11552 self.remote_node_info = None
11553 self.node_secondary_ip = None
11556 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11557 """Compute a new secondary node using an IAllocator.
11560 req = iallocator.IAReqRelocate(name=instance_name,
11561 relocate_from=list(relocate_from))
11562 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11564 ial.Run(iallocator_name)
11566 if not ial.success:
11567 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11568 " %s" % (iallocator_name, ial.info),
11569 errors.ECODE_NORES)
11571 remote_node_name = ial.result[0]
11573 lu.LogInfo("Selected new secondary for instance '%s': %s",
11574 instance_name, remote_node_name)
11576 return remote_node_name
11578 def _FindFaultyDisks(self, node_name):
11579 """Wrapper for L{_FindFaultyInstanceDisks}.
11582 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11585 def _CheckDisksActivated(self, instance):
11586 """Checks if the instance disks are activated.
11588 @param instance: The instance to check disks
11589 @return: True if they are activated, False otherwise
11592 nodes = instance.all_nodes
11594 for idx, dev in enumerate(instance.disks):
11596 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11597 self.cfg.SetDiskID(dev, node)
11599 result = _BlockdevFind(self, node, dev, instance)
11603 elif result.fail_msg or not result.payload:
11608 def CheckPrereq(self):
11609 """Check prerequisites.
11611 This checks that the instance is in the cluster.
11614 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11615 assert instance is not None, \
11616 "Cannot retrieve locked instance %s" % self.instance_name
11618 if instance.disk_template != constants.DT_DRBD8:
11619 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11620 " instances", errors.ECODE_INVAL)
11622 if len(instance.secondary_nodes) != 1:
11623 raise errors.OpPrereqError("The instance has a strange layout,"
11624 " expected one secondary but found %d" %
11625 len(instance.secondary_nodes),
11626 errors.ECODE_FAULT)
11628 instance = self.instance
11629 secondary_node = instance.secondary_nodes[0]
11631 if self.iallocator_name is None:
11632 remote_node = self.remote_node
11634 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11635 instance.name, instance.secondary_nodes)
11637 if remote_node is None:
11638 self.remote_node_info = None
11640 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11641 "Remote node '%s' is not locked" % remote_node
11643 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11644 assert self.remote_node_info is not None, \
11645 "Cannot retrieve locked node %s" % remote_node
11647 if remote_node == self.instance.primary_node:
11648 raise errors.OpPrereqError("The specified node is the primary node of"
11649 " the instance", errors.ECODE_INVAL)
11651 if remote_node == secondary_node:
11652 raise errors.OpPrereqError("The specified node is already the"
11653 " secondary node of the instance",
11654 errors.ECODE_INVAL)
11656 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11657 constants.REPLACE_DISK_CHG):
11658 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11659 errors.ECODE_INVAL)
11661 if self.mode == constants.REPLACE_DISK_AUTO:
11662 if not self._CheckDisksActivated(instance):
11663 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11664 " first" % self.instance_name,
11665 errors.ECODE_STATE)
11666 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11667 faulty_secondary = self._FindFaultyDisks(secondary_node)
11669 if faulty_primary and faulty_secondary:
11670 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11671 " one node and can not be repaired"
11672 " automatically" % self.instance_name,
11673 errors.ECODE_STATE)
11676 self.disks = faulty_primary
11677 self.target_node = instance.primary_node
11678 self.other_node = secondary_node
11679 check_nodes = [self.target_node, self.other_node]
11680 elif faulty_secondary:
11681 self.disks = faulty_secondary
11682 self.target_node = secondary_node
11683 self.other_node = instance.primary_node
11684 check_nodes = [self.target_node, self.other_node]
11690 # Non-automatic modes
11691 if self.mode == constants.REPLACE_DISK_PRI:
11692 self.target_node = instance.primary_node
11693 self.other_node = secondary_node
11694 check_nodes = [self.target_node, self.other_node]
11696 elif self.mode == constants.REPLACE_DISK_SEC:
11697 self.target_node = secondary_node
11698 self.other_node = instance.primary_node
11699 check_nodes = [self.target_node, self.other_node]
11701 elif self.mode == constants.REPLACE_DISK_CHG:
11702 self.new_node = remote_node
11703 self.other_node = instance.primary_node
11704 self.target_node = secondary_node
11705 check_nodes = [self.new_node, self.other_node]
11707 _CheckNodeNotDrained(self.lu, remote_node)
11708 _CheckNodeVmCapable(self.lu, remote_node)
11710 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11711 assert old_node_info is not None
11712 if old_node_info.offline and not self.early_release:
11713 # doesn't make sense to delay the release
11714 self.early_release = True
11715 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11716 " early-release mode", secondary_node)
11719 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11722 # If not specified all disks should be replaced
11724 self.disks = range(len(self.instance.disks))
11726 # TODO: This is ugly, but right now we can't distinguish between internal
11727 # submitted opcode and external one. We should fix that.
11728 if self.remote_node_info:
11729 # We change the node, lets verify it still meets instance policy
11730 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11731 cluster = self.cfg.GetClusterInfo()
11732 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11734 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11735 ignore=self.ignore_ipolicy)
11737 for node in check_nodes:
11738 _CheckNodeOnline(self.lu, node)
11740 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11743 if node_name is not None)
11745 # Release unneeded node and node resource locks
11746 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11747 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11748 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11750 # Release any owned node group
11751 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11753 # Check whether disks are valid
11754 for disk_idx in self.disks:
11755 instance.FindDisk(disk_idx)
11757 # Get secondary node IP addresses
11758 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11759 in self.cfg.GetMultiNodeInfo(touched_nodes))
11761 def Exec(self, feedback_fn):
11762 """Execute disk replacement.
11764 This dispatches the disk replacement to the appropriate handler.
11768 # Verify owned locks before starting operation
11769 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11770 assert set(owned_nodes) == set(self.node_secondary_ip), \
11771 ("Incorrect node locks, owning %s, expected %s" %
11772 (owned_nodes, self.node_secondary_ip.keys()))
11773 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11774 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11775 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11777 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11778 assert list(owned_instances) == [self.instance_name], \
11779 "Instance '%s' not locked" % self.instance_name
11781 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11782 "Should not own any node group lock at this point"
11785 feedback_fn("No disks need replacement for instance '%s'" %
11786 self.instance.name)
11789 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11790 (utils.CommaJoin(self.disks), self.instance.name))
11791 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11792 feedback_fn("Current seconary node: %s" %
11793 utils.CommaJoin(self.instance.secondary_nodes))
11795 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11797 # Activate the instance disks if we're replacing them on a down instance
11799 _StartInstanceDisks(self.lu, self.instance, True)
11802 # Should we replace the secondary node?
11803 if self.new_node is not None:
11804 fn = self._ExecDrbd8Secondary
11806 fn = self._ExecDrbd8DiskOnly
11808 result = fn(feedback_fn)
11810 # Deactivate the instance disks if we're replacing them on a
11813 _SafeShutdownInstanceDisks(self.lu, self.instance)
11815 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11818 # Verify owned locks
11819 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11820 nodes = frozenset(self.node_secondary_ip)
11821 assert ((self.early_release and not owned_nodes) or
11822 (not self.early_release and not (set(owned_nodes) - nodes))), \
11823 ("Not owning the correct locks, early_release=%s, owned=%r,"
11824 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11828 def _CheckVolumeGroup(self, nodes):
11829 self.lu.LogInfo("Checking volume groups")
11831 vgname = self.cfg.GetVGName()
11833 # Make sure volume group exists on all involved nodes
11834 results = self.rpc.call_vg_list(nodes)
11836 raise errors.OpExecError("Can't list volume groups on the nodes")
11839 res = results[node]
11840 res.Raise("Error checking node %s" % node)
11841 if vgname not in res.payload:
11842 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11845 def _CheckDisksExistence(self, nodes):
11846 # Check disk existence
11847 for idx, dev in enumerate(self.instance.disks):
11848 if idx not in self.disks:
11852 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11853 self.cfg.SetDiskID(dev, node)
11855 result = _BlockdevFind(self, node, dev, self.instance)
11857 msg = result.fail_msg
11858 if msg or not result.payload:
11860 msg = "disk not found"
11861 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11864 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11865 for idx, dev in enumerate(self.instance.disks):
11866 if idx not in self.disks:
11869 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11872 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11873 on_primary, ldisk=ldisk):
11874 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11875 " replace disks for instance %s" %
11876 (node_name, self.instance.name))
11878 def _CreateNewStorage(self, node_name):
11879 """Create new storage on the primary or secondary node.
11881 This is only used for same-node replaces, not for changing the
11882 secondary node, hence we don't want to modify the existing disk.
11887 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11888 for idx, dev in enumerate(disks):
11889 if idx not in self.disks:
11892 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11894 self.cfg.SetDiskID(dev, node_name)
11896 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11897 names = _GenerateUniqueNames(self.lu, lv_names)
11899 (data_disk, meta_disk) = dev.children
11900 vg_data = data_disk.logical_id[0]
11901 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11902 logical_id=(vg_data, names[0]),
11903 params=data_disk.params)
11904 vg_meta = meta_disk.logical_id[0]
11905 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11906 size=constants.DRBD_META_SIZE,
11907 logical_id=(vg_meta, names[1]),
11908 params=meta_disk.params)
11910 new_lvs = [lv_data, lv_meta]
11911 old_lvs = [child.Copy() for child in dev.children]
11912 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11913 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11915 # we pass force_create=True to force the LVM creation
11916 for new_lv in new_lvs:
11917 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11918 _GetInstanceInfoText(self.instance), False,
11923 def _CheckDevices(self, node_name, iv_names):
11924 for name, (dev, _, _) in iv_names.iteritems():
11925 self.cfg.SetDiskID(dev, node_name)
11927 result = _BlockdevFind(self, node_name, dev, self.instance)
11929 msg = result.fail_msg
11930 if msg or not result.payload:
11932 msg = "disk not found"
11933 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11936 if result.payload.is_degraded:
11937 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11939 def _RemoveOldStorage(self, node_name, iv_names):
11940 for name, (_, old_lvs, _) in iv_names.iteritems():
11941 self.lu.LogInfo("Remove logical volumes for %s", name)
11944 self.cfg.SetDiskID(lv, node_name)
11946 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11948 self.lu.LogWarning("Can't remove old LV: %s", msg,
11949 hint="remove unused LVs manually")
11951 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11952 """Replace a disk on the primary or secondary for DRBD 8.
11954 The algorithm for replace is quite complicated:
11956 1. for each disk to be replaced:
11958 1. create new LVs on the target node with unique names
11959 1. detach old LVs from the drbd device
11960 1. rename old LVs to name_replaced.<time_t>
11961 1. rename new LVs to old LVs
11962 1. attach the new LVs (with the old names now) to the drbd device
11964 1. wait for sync across all devices
11966 1. for each modified disk:
11968 1. remove old LVs (which have the name name_replaces.<time_t>)
11970 Failures are not very well handled.
11975 # Step: check device activation
11976 self.lu.LogStep(1, steps_total, "Check device existence")
11977 self._CheckDisksExistence([self.other_node, self.target_node])
11978 self._CheckVolumeGroup([self.target_node, self.other_node])
11980 # Step: check other node consistency
11981 self.lu.LogStep(2, steps_total, "Check peer consistency")
11982 self._CheckDisksConsistency(self.other_node,
11983 self.other_node == self.instance.primary_node,
11986 # Step: create new storage
11987 self.lu.LogStep(3, steps_total, "Allocate new storage")
11988 iv_names = self._CreateNewStorage(self.target_node)
11990 # Step: for each lv, detach+rename*2+attach
11991 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11992 for dev, old_lvs, new_lvs in iv_names.itervalues():
11993 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11995 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11997 result.Raise("Can't detach drbd from local storage on node"
11998 " %s for device %s" % (self.target_node, dev.iv_name))
12000 #cfg.Update(instance)
12002 # ok, we created the new LVs, so now we know we have the needed
12003 # storage; as such, we proceed on the target node to rename
12004 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12005 # using the assumption that logical_id == physical_id (which in
12006 # turn is the unique_id on that node)
12008 # FIXME(iustin): use a better name for the replaced LVs
12009 temp_suffix = int(time.time())
12010 ren_fn = lambda d, suff: (d.physical_id[0],
12011 d.physical_id[1] + "_replaced-%s" % suff)
12013 # Build the rename list based on what LVs exist on the node
12014 rename_old_to_new = []
12015 for to_ren in old_lvs:
12016 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12017 if not result.fail_msg and result.payload:
12019 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12021 self.lu.LogInfo("Renaming the old LVs on the target node")
12022 result = self.rpc.call_blockdev_rename(self.target_node,
12024 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12026 # Now we rename the new LVs to the old LVs
12027 self.lu.LogInfo("Renaming the new LVs on the target node")
12028 rename_new_to_old = [(new, old.physical_id)
12029 for old, new in zip(old_lvs, new_lvs)]
12030 result = self.rpc.call_blockdev_rename(self.target_node,
12032 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12034 # Intermediate steps of in memory modifications
12035 for old, new in zip(old_lvs, new_lvs):
12036 new.logical_id = old.logical_id
12037 self.cfg.SetDiskID(new, self.target_node)
12039 # We need to modify old_lvs so that removal later removes the
12040 # right LVs, not the newly added ones; note that old_lvs is a
12042 for disk in old_lvs:
12043 disk.logical_id = ren_fn(disk, temp_suffix)
12044 self.cfg.SetDiskID(disk, self.target_node)
12046 # Now that the new lvs have the old name, we can add them to the device
12047 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12048 result = self.rpc.call_blockdev_addchildren(self.target_node,
12049 (dev, self.instance), new_lvs)
12050 msg = result.fail_msg
12052 for new_lv in new_lvs:
12053 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12056 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12057 hint=("cleanup manually the unused logical"
12059 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12061 cstep = itertools.count(5)
12063 if self.early_release:
12064 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12065 self._RemoveOldStorage(self.target_node, iv_names)
12066 # TODO: Check if releasing locks early still makes sense
12067 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12069 # Release all resource locks except those used by the instance
12070 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12071 keep=self.node_secondary_ip.keys())
12073 # Release all node locks while waiting for sync
12074 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12076 # TODO: Can the instance lock be downgraded here? Take the optional disk
12077 # shutdown in the caller into consideration.
12080 # This can fail as the old devices are degraded and _WaitForSync
12081 # does a combined result over all disks, so we don't check its return value
12082 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12083 _WaitForSync(self.lu, self.instance)
12085 # Check all devices manually
12086 self._CheckDevices(self.instance.primary_node, iv_names)
12088 # Step: remove old storage
12089 if not self.early_release:
12090 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12091 self._RemoveOldStorage(self.target_node, iv_names)
12093 def _ExecDrbd8Secondary(self, feedback_fn):
12094 """Replace the secondary node for DRBD 8.
12096 The algorithm for replace is quite complicated:
12097 - for all disks of the instance:
12098 - create new LVs on the new node with same names
12099 - shutdown the drbd device on the old secondary
12100 - disconnect the drbd network on the primary
12101 - create the drbd device on the new secondary
12102 - network attach the drbd on the primary, using an artifice:
12103 the drbd code for Attach() will connect to the network if it
12104 finds a device which is connected to the good local disks but
12105 not network enabled
12106 - wait for sync across all devices
12107 - remove all disks from the old secondary
12109 Failures are not very well handled.
12114 pnode = self.instance.primary_node
12116 # Step: check device activation
12117 self.lu.LogStep(1, steps_total, "Check device existence")
12118 self._CheckDisksExistence([self.instance.primary_node])
12119 self._CheckVolumeGroup([self.instance.primary_node])
12121 # Step: check other node consistency
12122 self.lu.LogStep(2, steps_total, "Check peer consistency")
12123 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12125 # Step: create new storage
12126 self.lu.LogStep(3, steps_total, "Allocate new storage")
12127 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12128 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12129 for idx, dev in enumerate(disks):
12130 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12131 (self.new_node, idx))
12132 # we pass force_create=True to force LVM creation
12133 for new_lv in dev.children:
12134 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12135 True, _GetInstanceInfoText(self.instance), False,
12138 # Step 4: dbrd minors and drbd setups changes
12139 # after this, we must manually remove the drbd minors on both the
12140 # error and the success paths
12141 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12142 minors = self.cfg.AllocateDRBDMinor([self.new_node
12143 for dev in self.instance.disks],
12144 self.instance.name)
12145 logging.debug("Allocated minors %r", minors)
12148 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12149 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12150 (self.new_node, idx))
12151 # create new devices on new_node; note that we create two IDs:
12152 # one without port, so the drbd will be activated without
12153 # networking information on the new node at this stage, and one
12154 # with network, for the latter activation in step 4
12155 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12156 if self.instance.primary_node == o_node1:
12159 assert self.instance.primary_node == o_node2, "Three-node instance?"
12162 new_alone_id = (self.instance.primary_node, self.new_node, None,
12163 p_minor, new_minor, o_secret)
12164 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12165 p_minor, new_minor, o_secret)
12167 iv_names[idx] = (dev, dev.children, new_net_id)
12168 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12170 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12171 logical_id=new_alone_id,
12172 children=dev.children,
12175 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12178 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12180 _GetInstanceInfoText(self.instance), False,
12182 except errors.GenericError:
12183 self.cfg.ReleaseDRBDMinors(self.instance.name)
12186 # We have new devices, shutdown the drbd on the old secondary
12187 for idx, dev in enumerate(self.instance.disks):
12188 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12189 self.cfg.SetDiskID(dev, self.target_node)
12190 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12191 (dev, self.instance)).fail_msg
12193 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12194 "node: %s" % (idx, msg),
12195 hint=("Please cleanup this device manually as"
12196 " soon as possible"))
12198 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12199 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12200 self.instance.disks)[pnode]
12202 msg = result.fail_msg
12204 # detaches didn't succeed (unlikely)
12205 self.cfg.ReleaseDRBDMinors(self.instance.name)
12206 raise errors.OpExecError("Can't detach the disks from the network on"
12207 " old node: %s" % (msg,))
12209 # if we managed to detach at least one, we update all the disks of
12210 # the instance to point to the new secondary
12211 self.lu.LogInfo("Updating instance configuration")
12212 for dev, _, new_logical_id in iv_names.itervalues():
12213 dev.logical_id = new_logical_id
12214 self.cfg.SetDiskID(dev, self.instance.primary_node)
12216 self.cfg.Update(self.instance, feedback_fn)
12218 # Release all node locks (the configuration has been updated)
12219 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12221 # and now perform the drbd attach
12222 self.lu.LogInfo("Attaching primary drbds to new secondary"
12223 " (standalone => connected)")
12224 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12226 self.node_secondary_ip,
12227 (self.instance.disks, self.instance),
12228 self.instance.name,
12230 for to_node, to_result in result.items():
12231 msg = to_result.fail_msg
12233 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12235 hint=("please do a gnt-instance info to see the"
12236 " status of disks"))
12238 cstep = itertools.count(5)
12240 if self.early_release:
12241 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12242 self._RemoveOldStorage(self.target_node, iv_names)
12243 # TODO: Check if releasing locks early still makes sense
12244 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12246 # Release all resource locks except those used by the instance
12247 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12248 keep=self.node_secondary_ip.keys())
12250 # TODO: Can the instance lock be downgraded here? Take the optional disk
12251 # shutdown in the caller into consideration.
12254 # This can fail as the old devices are degraded and _WaitForSync
12255 # does a combined result over all disks, so we don't check its return value
12256 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12257 _WaitForSync(self.lu, self.instance)
12259 # Check all devices manually
12260 self._CheckDevices(self.instance.primary_node, iv_names)
12262 # Step: remove old storage
12263 if not self.early_release:
12264 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12265 self._RemoveOldStorage(self.target_node, iv_names)
12268 class LURepairNodeStorage(NoHooksLU):
12269 """Repairs the volume group on a node.
12274 def CheckArguments(self):
12275 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12277 storage_type = self.op.storage_type
12279 if (constants.SO_FIX_CONSISTENCY not in
12280 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12281 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12282 " repaired" % storage_type,
12283 errors.ECODE_INVAL)
12285 def ExpandNames(self):
12286 self.needed_locks = {
12287 locking.LEVEL_NODE: [self.op.node_name],
12290 def _CheckFaultyDisks(self, instance, node_name):
12291 """Ensure faulty disks abort the opcode or at least warn."""
12293 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12295 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12296 " node '%s'" % (instance.name, node_name),
12297 errors.ECODE_STATE)
12298 except errors.OpPrereqError, err:
12299 if self.op.ignore_consistency:
12300 self.LogWarning(str(err.args[0]))
12304 def CheckPrereq(self):
12305 """Check prerequisites.
12308 # Check whether any instance on this node has faulty disks
12309 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12310 if inst.admin_state != constants.ADMINST_UP:
12312 check_nodes = set(inst.all_nodes)
12313 check_nodes.discard(self.op.node_name)
12314 for inst_node_name in check_nodes:
12315 self._CheckFaultyDisks(inst, inst_node_name)
12317 def Exec(self, feedback_fn):
12318 feedback_fn("Repairing storage unit '%s' on %s ..." %
12319 (self.op.name, self.op.node_name))
12321 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12322 result = self.rpc.call_storage_execute(self.op.node_name,
12323 self.op.storage_type, st_args,
12325 constants.SO_FIX_CONSISTENCY)
12326 result.Raise("Failed to repair storage unit '%s' on %s" %
12327 (self.op.name, self.op.node_name))
12330 class LUNodeEvacuate(NoHooksLU):
12331 """Evacuates instances off a list of nodes.
12336 _MODE2IALLOCATOR = {
12337 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12338 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12339 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12341 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12342 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12343 constants.IALLOCATOR_NEVAC_MODES)
12345 def CheckArguments(self):
12346 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12348 def ExpandNames(self):
12349 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12351 if self.op.remote_node is not None:
12352 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12353 assert self.op.remote_node
12355 if self.op.remote_node == self.op.node_name:
12356 raise errors.OpPrereqError("Can not use evacuated node as a new"
12357 " secondary node", errors.ECODE_INVAL)
12359 if self.op.mode != constants.NODE_EVAC_SEC:
12360 raise errors.OpPrereqError("Without the use of an iallocator only"
12361 " secondary instances can be evacuated",
12362 errors.ECODE_INVAL)
12365 self.share_locks = _ShareAll()
12366 self.needed_locks = {
12367 locking.LEVEL_INSTANCE: [],
12368 locking.LEVEL_NODEGROUP: [],
12369 locking.LEVEL_NODE: [],
12372 # Determine nodes (via group) optimistically, needs verification once locks
12373 # have been acquired
12374 self.lock_nodes = self._DetermineNodes()
12376 def _DetermineNodes(self):
12377 """Gets the list of nodes to operate on.
12380 if self.op.remote_node is None:
12381 # Iallocator will choose any node(s) in the same group
12382 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12384 group_nodes = frozenset([self.op.remote_node])
12386 # Determine nodes to be locked
12387 return set([self.op.node_name]) | group_nodes
12389 def _DetermineInstances(self):
12390 """Builds list of instances to operate on.
12393 assert self.op.mode in constants.NODE_EVAC_MODES
12395 if self.op.mode == constants.NODE_EVAC_PRI:
12396 # Primary instances only
12397 inst_fn = _GetNodePrimaryInstances
12398 assert self.op.remote_node is None, \
12399 "Evacuating primary instances requires iallocator"
12400 elif self.op.mode == constants.NODE_EVAC_SEC:
12401 # Secondary instances only
12402 inst_fn = _GetNodeSecondaryInstances
12405 assert self.op.mode == constants.NODE_EVAC_ALL
12406 inst_fn = _GetNodeInstances
12407 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12409 raise errors.OpPrereqError("Due to an issue with the iallocator"
12410 " interface it is not possible to evacuate"
12411 " all instances at once; specify explicitly"
12412 " whether to evacuate primary or secondary"
12414 errors.ECODE_INVAL)
12416 return inst_fn(self.cfg, self.op.node_name)
12418 def DeclareLocks(self, level):
12419 if level == locking.LEVEL_INSTANCE:
12420 # Lock instances optimistically, needs verification once node and group
12421 # locks have been acquired
12422 self.needed_locks[locking.LEVEL_INSTANCE] = \
12423 set(i.name for i in self._DetermineInstances())
12425 elif level == locking.LEVEL_NODEGROUP:
12426 # Lock node groups for all potential target nodes optimistically, needs
12427 # verification once nodes have been acquired
12428 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12429 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12431 elif level == locking.LEVEL_NODE:
12432 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12434 def CheckPrereq(self):
12436 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12437 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12438 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12440 need_nodes = self._DetermineNodes()
12442 if not owned_nodes.issuperset(need_nodes):
12443 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12444 " locks were acquired, current nodes are"
12445 " are '%s', used to be '%s'; retry the"
12447 (self.op.node_name,
12448 utils.CommaJoin(need_nodes),
12449 utils.CommaJoin(owned_nodes)),
12450 errors.ECODE_STATE)
12452 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12453 if owned_groups != wanted_groups:
12454 raise errors.OpExecError("Node groups changed since locks were acquired,"
12455 " current groups are '%s', used to be '%s';"
12456 " retry the operation" %
12457 (utils.CommaJoin(wanted_groups),
12458 utils.CommaJoin(owned_groups)))
12460 # Determine affected instances
12461 self.instances = self._DetermineInstances()
12462 self.instance_names = [i.name for i in self.instances]
12464 if set(self.instance_names) != owned_instances:
12465 raise errors.OpExecError("Instances on node '%s' changed since locks"
12466 " were acquired, current instances are '%s',"
12467 " used to be '%s'; retry the operation" %
12468 (self.op.node_name,
12469 utils.CommaJoin(self.instance_names),
12470 utils.CommaJoin(owned_instances)))
12472 if self.instance_names:
12473 self.LogInfo("Evacuating instances from node '%s': %s",
12475 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12477 self.LogInfo("No instances to evacuate from node '%s'",
12480 if self.op.remote_node is not None:
12481 for i in self.instances:
12482 if i.primary_node == self.op.remote_node:
12483 raise errors.OpPrereqError("Node %s is the primary node of"
12484 " instance %s, cannot use it as"
12486 (self.op.remote_node, i.name),
12487 errors.ECODE_INVAL)
12489 def Exec(self, feedback_fn):
12490 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12492 if not self.instance_names:
12493 # No instances to evacuate
12496 elif self.op.iallocator is not None:
12497 # TODO: Implement relocation to other group
12498 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12499 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12500 instances=list(self.instance_names))
12501 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12503 ial.Run(self.op.iallocator)
12505 if not ial.success:
12506 raise errors.OpPrereqError("Can't compute node evacuation using"
12507 " iallocator '%s': %s" %
12508 (self.op.iallocator, ial.info),
12509 errors.ECODE_NORES)
12511 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12513 elif self.op.remote_node is not None:
12514 assert self.op.mode == constants.NODE_EVAC_SEC
12516 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12517 remote_node=self.op.remote_node,
12519 mode=constants.REPLACE_DISK_CHG,
12520 early_release=self.op.early_release)]
12521 for instance_name in self.instance_names]
12524 raise errors.ProgrammerError("No iallocator or remote node")
12526 return ResultWithJobs(jobs)
12529 def _SetOpEarlyRelease(early_release, op):
12530 """Sets C{early_release} flag on opcodes if available.
12534 op.early_release = early_release
12535 except AttributeError:
12536 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12541 def _NodeEvacDest(use_nodes, group, nodes):
12542 """Returns group or nodes depending on caller's choice.
12546 return utils.CommaJoin(nodes)
12551 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12552 """Unpacks the result of change-group and node-evacuate iallocator requests.
12554 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12555 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12557 @type lu: L{LogicalUnit}
12558 @param lu: Logical unit instance
12559 @type alloc_result: tuple/list
12560 @param alloc_result: Result from iallocator
12561 @type early_release: bool
12562 @param early_release: Whether to release locks early if possible
12563 @type use_nodes: bool
12564 @param use_nodes: Whether to display node names instead of groups
12567 (moved, failed, jobs) = alloc_result
12570 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12571 for (name, reason) in failed)
12572 lu.LogWarning("Unable to evacuate instances %s", failreason)
12573 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12576 lu.LogInfo("Instances to be moved: %s",
12577 utils.CommaJoin("%s (to %s)" %
12578 (name, _NodeEvacDest(use_nodes, group, nodes))
12579 for (name, group, nodes) in moved))
12581 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12582 map(opcodes.OpCode.LoadOpCode, ops))
12586 def _DiskSizeInBytesToMebibytes(lu, size):
12587 """Converts a disk size in bytes to mebibytes.
12589 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12592 (mib, remainder) = divmod(size, 1024 * 1024)
12595 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12596 " to not overwrite existing data (%s bytes will not be"
12597 " wiped)", (1024 * 1024) - remainder)
12603 class LUInstanceGrowDisk(LogicalUnit):
12604 """Grow a disk of an instance.
12607 HPATH = "disk-grow"
12608 HTYPE = constants.HTYPE_INSTANCE
12611 def ExpandNames(self):
12612 self._ExpandAndLockInstance()
12613 self.needed_locks[locking.LEVEL_NODE] = []
12614 self.needed_locks[locking.LEVEL_NODE_RES] = []
12615 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12616 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12618 def DeclareLocks(self, level):
12619 if level == locking.LEVEL_NODE:
12620 self._LockInstancesNodes()
12621 elif level == locking.LEVEL_NODE_RES:
12623 self.needed_locks[locking.LEVEL_NODE_RES] = \
12624 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12626 def BuildHooksEnv(self):
12627 """Build hooks env.
12629 This runs on the master, the primary and all the secondaries.
12633 "DISK": self.op.disk,
12634 "AMOUNT": self.op.amount,
12635 "ABSOLUTE": self.op.absolute,
12637 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12640 def BuildHooksNodes(self):
12641 """Build hooks nodes.
12644 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12647 def CheckPrereq(self):
12648 """Check prerequisites.
12650 This checks that the instance is in the cluster.
12653 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12654 assert instance is not None, \
12655 "Cannot retrieve locked instance %s" % self.op.instance_name
12656 nodenames = list(instance.all_nodes)
12657 for node in nodenames:
12658 _CheckNodeOnline(self, node)
12660 self.instance = instance
12662 if instance.disk_template not in constants.DTS_GROWABLE:
12663 raise errors.OpPrereqError("Instance's disk layout does not support"
12664 " growing", errors.ECODE_INVAL)
12666 self.disk = instance.FindDisk(self.op.disk)
12668 if self.op.absolute:
12669 self.target = self.op.amount
12670 self.delta = self.target - self.disk.size
12672 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12673 "current disk size (%s)" %
12674 (utils.FormatUnit(self.target, "h"),
12675 utils.FormatUnit(self.disk.size, "h")),
12676 errors.ECODE_STATE)
12678 self.delta = self.op.amount
12679 self.target = self.disk.size + self.delta
12681 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12682 utils.FormatUnit(self.delta, "h"),
12683 errors.ECODE_INVAL)
12685 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12687 def _CheckDiskSpace(self, nodenames, req_vgspace):
12688 template = self.instance.disk_template
12689 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12690 # TODO: check the free disk space for file, when that feature will be
12692 nodes = map(self.cfg.GetNodeInfo, nodenames)
12693 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12696 # With exclusive storage we need to something smarter than just looking
12697 # at free space; for now, let's simply abort the operation.
12698 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12699 " is enabled", errors.ECODE_STATE)
12700 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12702 def Exec(self, feedback_fn):
12703 """Execute disk grow.
12706 instance = self.instance
12709 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12710 assert (self.owned_locks(locking.LEVEL_NODE) ==
12711 self.owned_locks(locking.LEVEL_NODE_RES))
12713 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12715 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12717 raise errors.OpExecError("Cannot activate block device to grow")
12719 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12720 (self.op.disk, instance.name,
12721 utils.FormatUnit(self.delta, "h"),
12722 utils.FormatUnit(self.target, "h")))
12724 # First run all grow ops in dry-run mode
12725 for node in instance.all_nodes:
12726 self.cfg.SetDiskID(disk, node)
12727 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12729 result.Raise("Dry-run grow request failed to node %s" % node)
12732 # Get disk size from primary node for wiping
12733 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12734 result.Raise("Failed to retrieve disk size from node '%s'" %
12735 instance.primary_node)
12737 (disk_size_in_bytes, ) = result.payload
12739 if disk_size_in_bytes is None:
12740 raise errors.OpExecError("Failed to retrieve disk size from primary"
12741 " node '%s'" % instance.primary_node)
12743 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12745 assert old_disk_size >= disk.size, \
12746 ("Retrieved disk size too small (got %s, should be at least %s)" %
12747 (old_disk_size, disk.size))
12749 old_disk_size = None
12751 # We know that (as far as we can test) operations across different
12752 # nodes will succeed, time to run it for real on the backing storage
12753 for node in instance.all_nodes:
12754 self.cfg.SetDiskID(disk, node)
12755 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12757 result.Raise("Grow request failed to node %s" % node)
12759 # And now execute it for logical storage, on the primary node
12760 node = instance.primary_node
12761 self.cfg.SetDiskID(disk, node)
12762 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12764 result.Raise("Grow request failed to node %s" % node)
12766 disk.RecordGrow(self.delta)
12767 self.cfg.Update(instance, feedback_fn)
12769 # Changes have been recorded, release node lock
12770 _ReleaseLocks(self, locking.LEVEL_NODE)
12772 # Downgrade lock while waiting for sync
12773 self.glm.downgrade(locking.LEVEL_INSTANCE)
12775 assert wipe_disks ^ (old_disk_size is None)
12778 assert instance.disks[self.op.disk] == disk
12780 # Wipe newly added disk space
12781 _WipeDisks(self, instance,
12782 disks=[(self.op.disk, disk, old_disk_size)])
12784 if self.op.wait_for_sync:
12785 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12787 self.LogWarning("Disk syncing has not returned a good status; check"
12789 if instance.admin_state != constants.ADMINST_UP:
12790 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12791 elif instance.admin_state != constants.ADMINST_UP:
12792 self.LogWarning("Not shutting down the disk even if the instance is"
12793 " not supposed to be running because no wait for"
12794 " sync mode was requested")
12796 assert self.owned_locks(locking.LEVEL_NODE_RES)
12797 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12800 class LUInstanceQueryData(NoHooksLU):
12801 """Query runtime instance data.
12806 def ExpandNames(self):
12807 self.needed_locks = {}
12809 # Use locking if requested or when non-static information is wanted
12810 if not (self.op.static or self.op.use_locking):
12811 self.LogWarning("Non-static data requested, locks need to be acquired")
12812 self.op.use_locking = True
12814 if self.op.instances or not self.op.use_locking:
12815 # Expand instance names right here
12816 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12818 # Will use acquired locks
12819 self.wanted_names = None
12821 if self.op.use_locking:
12822 self.share_locks = _ShareAll()
12824 if self.wanted_names is None:
12825 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12827 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12829 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12830 self.needed_locks[locking.LEVEL_NODE] = []
12831 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12833 def DeclareLocks(self, level):
12834 if self.op.use_locking:
12835 if level == locking.LEVEL_NODEGROUP:
12836 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12838 # Lock all groups used by instances optimistically; this requires going
12839 # via the node before it's locked, requiring verification later on
12840 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12841 frozenset(group_uuid
12842 for instance_name in owned_instances
12844 self.cfg.GetInstanceNodeGroups(instance_name))
12846 elif level == locking.LEVEL_NODE:
12847 self._LockInstancesNodes()
12849 def CheckPrereq(self):
12850 """Check prerequisites.
12852 This only checks the optional instance list against the existing names.
12855 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12856 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12857 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12859 if self.wanted_names is None:
12860 assert self.op.use_locking, "Locking was not used"
12861 self.wanted_names = owned_instances
12863 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12865 if self.op.use_locking:
12866 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12869 assert not (owned_instances or owned_groups or owned_nodes)
12871 self.wanted_instances = instances.values()
12873 def _ComputeBlockdevStatus(self, node, instance, dev):
12874 """Returns the status of a block device
12877 if self.op.static or not node:
12880 self.cfg.SetDiskID(dev, node)
12882 result = self.rpc.call_blockdev_find(node, dev)
12886 result.Raise("Can't compute disk status for %s" % instance.name)
12888 status = result.payload
12892 return (status.dev_path, status.major, status.minor,
12893 status.sync_percent, status.estimated_time,
12894 status.is_degraded, status.ldisk_status)
12896 def _ComputeDiskStatus(self, instance, snode, dev):
12897 """Compute block device status.
12900 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12902 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12904 def _ComputeDiskStatusInner(self, instance, snode, dev):
12905 """Compute block device status.
12907 @attention: The device has to be annotated already.
12910 if dev.dev_type in constants.LDS_DRBD:
12911 # we change the snode then (otherwise we use the one passed in)
12912 if dev.logical_id[0] == instance.primary_node:
12913 snode = dev.logical_id[1]
12915 snode = dev.logical_id[0]
12917 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12919 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12922 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12929 "iv_name": dev.iv_name,
12930 "dev_type": dev.dev_type,
12931 "logical_id": dev.logical_id,
12932 "physical_id": dev.physical_id,
12933 "pstatus": dev_pstatus,
12934 "sstatus": dev_sstatus,
12935 "children": dev_children,
12940 def Exec(self, feedback_fn):
12941 """Gather and return data"""
12944 cluster = self.cfg.GetClusterInfo()
12946 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12947 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12949 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12950 for node in nodes.values()))
12952 group2name_fn = lambda uuid: groups[uuid].name
12954 for instance in self.wanted_instances:
12955 pnode = nodes[instance.primary_node]
12957 if self.op.static or pnode.offline:
12958 remote_state = None
12960 self.LogWarning("Primary node %s is marked offline, returning static"
12961 " information only for instance %s" %
12962 (pnode.name, instance.name))
12964 remote_info = self.rpc.call_instance_info(instance.primary_node,
12966 instance.hypervisor)
12967 remote_info.Raise("Error checking node %s" % instance.primary_node)
12968 remote_info = remote_info.payload
12969 if remote_info and "state" in remote_info:
12970 remote_state = "up"
12972 if instance.admin_state == constants.ADMINST_UP:
12973 remote_state = "down"
12975 remote_state = instance.admin_state
12977 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12980 snodes_group_uuids = [nodes[snode_name].group
12981 for snode_name in instance.secondary_nodes]
12983 result[instance.name] = {
12984 "name": instance.name,
12985 "config_state": instance.admin_state,
12986 "run_state": remote_state,
12987 "pnode": instance.primary_node,
12988 "pnode_group_uuid": pnode.group,
12989 "pnode_group_name": group2name_fn(pnode.group),
12990 "snodes": instance.secondary_nodes,
12991 "snodes_group_uuids": snodes_group_uuids,
12992 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12994 # this happens to be the same format used for hooks
12995 "nics": _NICListToTuple(self, instance.nics),
12996 "disk_template": instance.disk_template,
12998 "hypervisor": instance.hypervisor,
12999 "network_port": instance.network_port,
13000 "hv_instance": instance.hvparams,
13001 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13002 "be_instance": instance.beparams,
13003 "be_actual": cluster.FillBE(instance),
13004 "os_instance": instance.osparams,
13005 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13006 "serial_no": instance.serial_no,
13007 "mtime": instance.mtime,
13008 "ctime": instance.ctime,
13009 "uuid": instance.uuid,
13015 def PrepareContainerMods(mods, private_fn):
13016 """Prepares a list of container modifications by adding a private data field.
13018 @type mods: list of tuples; (operation, index, parameters)
13019 @param mods: List of modifications
13020 @type private_fn: callable or None
13021 @param private_fn: Callable for constructing a private data field for a
13026 if private_fn is None:
13031 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13034 #: Type description for changes as returned by L{ApplyContainerMods}'s
13036 _TApplyContModsCbChanges = \
13037 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13038 ht.TNonEmptyString,
13043 def ApplyContainerMods(kind, container, chgdesc, mods,
13044 create_fn, modify_fn, remove_fn):
13045 """Applies descriptions in C{mods} to C{container}.
13048 @param kind: One-word item description
13049 @type container: list
13050 @param container: Container to modify
13051 @type chgdesc: None or list
13052 @param chgdesc: List of applied changes
13054 @param mods: Modifications as returned by L{PrepareContainerMods}
13055 @type create_fn: callable
13056 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13057 receives absolute item index, parameters and private data object as added
13058 by L{PrepareContainerMods}, returns tuple containing new item and changes
13060 @type modify_fn: callable
13061 @param modify_fn: Callback for modifying an existing item
13062 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13063 and private data object as added by L{PrepareContainerMods}, returns
13065 @type remove_fn: callable
13066 @param remove_fn: Callback on removing item; receives absolute item index,
13067 item and private data object as added by L{PrepareContainerMods}
13070 for (op, idx, params, private) in mods:
13073 absidx = len(container) - 1
13075 raise IndexError("Not accepting negative indices other than -1")
13076 elif idx > len(container):
13077 raise IndexError("Got %s index %s, but there are only %s" %
13078 (kind, idx, len(container)))
13084 if op == constants.DDM_ADD:
13085 # Calculate where item will be added
13087 addidx = len(container)
13091 if create_fn is None:
13094 (item, changes) = create_fn(addidx, params, private)
13097 container.append(item)
13100 assert idx <= len(container)
13101 # list.insert does so before the specified index
13102 container.insert(idx, item)
13104 # Retrieve existing item
13106 item = container[absidx]
13108 raise IndexError("Invalid %s index %s" % (kind, idx))
13110 if op == constants.DDM_REMOVE:
13113 if remove_fn is not None:
13114 remove_fn(absidx, item, private)
13116 changes = [("%s/%s" % (kind, absidx), "remove")]
13118 assert container[absidx] == item
13119 del container[absidx]
13120 elif op == constants.DDM_MODIFY:
13121 if modify_fn is not None:
13122 changes = modify_fn(absidx, item, params, private)
13124 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13126 assert _TApplyContModsCbChanges(changes)
13128 if not (chgdesc is None or changes is None):
13129 chgdesc.extend(changes)
13132 def _UpdateIvNames(base_index, disks):
13133 """Updates the C{iv_name} attribute of disks.
13135 @type disks: list of L{objects.Disk}
13138 for (idx, disk) in enumerate(disks):
13139 disk.iv_name = "disk/%s" % (base_index + idx, )
13142 class _InstNicModPrivate:
13143 """Data structure for network interface modifications.
13145 Used by L{LUInstanceSetParams}.
13148 def __init__(self):
13153 class LUInstanceSetParams(LogicalUnit):
13154 """Modifies an instances's parameters.
13157 HPATH = "instance-modify"
13158 HTYPE = constants.HTYPE_INSTANCE
13162 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13163 assert ht.TList(mods)
13164 assert not mods or len(mods[0]) in (2, 3)
13166 if mods and len(mods[0]) == 2:
13170 for op, params in mods:
13171 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13172 result.append((op, -1, params))
13176 raise errors.OpPrereqError("Only one %s add or remove operation is"
13177 " supported at a time" % kind,
13178 errors.ECODE_INVAL)
13180 result.append((constants.DDM_MODIFY, op, params))
13182 assert verify_fn(result)
13189 def _CheckMods(kind, mods, key_types, item_fn):
13190 """Ensures requested disk/NIC modifications are valid.
13193 for (op, _, params) in mods:
13194 assert ht.TDict(params)
13196 # If 'key_types' is an empty dict, we assume we have an
13197 # 'ext' template and thus do not ForceDictType
13199 utils.ForceDictType(params, key_types)
13201 if op == constants.DDM_REMOVE:
13203 raise errors.OpPrereqError("No settings should be passed when"
13204 " removing a %s" % kind,
13205 errors.ECODE_INVAL)
13206 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13207 item_fn(op, params)
13209 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13212 def _VerifyDiskModification(op, params):
13213 """Verifies a disk modification.
13216 if op == constants.DDM_ADD:
13217 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13218 if mode not in constants.DISK_ACCESS_SET:
13219 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13220 errors.ECODE_INVAL)
13222 size = params.get(constants.IDISK_SIZE, None)
13224 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13225 constants.IDISK_SIZE, errors.ECODE_INVAL)
13229 except (TypeError, ValueError), err:
13230 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13231 errors.ECODE_INVAL)
13233 params[constants.IDISK_SIZE] = size
13235 elif op == constants.DDM_MODIFY:
13236 if constants.IDISK_SIZE in params:
13237 raise errors.OpPrereqError("Disk size change not possible, use"
13238 " grow-disk", errors.ECODE_INVAL)
13239 if constants.IDISK_MODE not in params:
13240 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13241 " modification supported, but missing",
13242 errors.ECODE_NOENT)
13243 if len(params) > 1:
13244 raise errors.OpPrereqError("Disk modification doesn't support"
13245 " additional arbitrary parameters",
13246 errors.ECODE_INVAL)
13249 def _VerifyNicModification(op, params):
13250 """Verifies a network interface modification.
13253 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13254 ip = params.get(constants.INIC_IP, None)
13255 req_net = params.get(constants.INIC_NETWORK, None)
13256 link = params.get(constants.NIC_LINK, None)
13257 mode = params.get(constants.NIC_MODE, None)
13258 if req_net is not None:
13259 if req_net.lower() == constants.VALUE_NONE:
13260 params[constants.INIC_NETWORK] = None
13262 elif link is not None or mode is not None:
13263 raise errors.OpPrereqError("If network is given"
13264 " mode or link should not",
13265 errors.ECODE_INVAL)
13267 if op == constants.DDM_ADD:
13268 macaddr = params.get(constants.INIC_MAC, None)
13269 if macaddr is None:
13270 params[constants.INIC_MAC] = constants.VALUE_AUTO
13273 if ip.lower() == constants.VALUE_NONE:
13274 params[constants.INIC_IP] = None
13276 if ip.lower() == constants.NIC_IP_POOL:
13277 if op == constants.DDM_ADD and req_net is None:
13278 raise errors.OpPrereqError("If ip=pool, parameter network"
13280 errors.ECODE_INVAL)
13282 if not netutils.IPAddress.IsValid(ip):
13283 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13284 errors.ECODE_INVAL)
13286 if constants.INIC_MAC in params:
13287 macaddr = params[constants.INIC_MAC]
13288 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13289 macaddr = utils.NormalizeAndValidateMac(macaddr)
13291 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13292 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13293 " modifying an existing NIC",
13294 errors.ECODE_INVAL)
13296 def CheckArguments(self):
13297 if not (self.op.nics or self.op.disks or self.op.disk_template or
13298 self.op.hvparams or self.op.beparams or self.op.os_name or
13299 self.op.offline is not None or self.op.runtime_mem):
13300 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13302 if self.op.hvparams:
13303 _CheckGlobalHvParams(self.op.hvparams)
13305 self.op.disks = self._UpgradeDiskNicMods(
13306 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13307 self.op.nics = self._UpgradeDiskNicMods(
13308 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13310 if self.op.disks and self.op.disk_template is not None:
13311 raise errors.OpPrereqError("Disk template conversion and other disk"
13312 " changes not supported at the same time",
13313 errors.ECODE_INVAL)
13315 if (self.op.disk_template and
13316 self.op.disk_template in constants.DTS_INT_MIRROR and
13317 self.op.remote_node is None):
13318 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13319 " one requires specifying a secondary node",
13320 errors.ECODE_INVAL)
13322 # Check NIC modifications
13323 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13324 self._VerifyNicModification)
13326 def ExpandNames(self):
13327 self._ExpandAndLockInstance()
13328 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13329 # Can't even acquire node locks in shared mode as upcoming changes in
13330 # Ganeti 2.6 will start to modify the node object on disk conversion
13331 self.needed_locks[locking.LEVEL_NODE] = []
13332 self.needed_locks[locking.LEVEL_NODE_RES] = []
13333 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13334 # Look node group to look up the ipolicy
13335 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13337 def DeclareLocks(self, level):
13338 if level == locking.LEVEL_NODEGROUP:
13339 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13340 # Acquire locks for the instance's nodegroups optimistically. Needs
13341 # to be verified in CheckPrereq
13342 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13343 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13344 elif level == locking.LEVEL_NODE:
13345 self._LockInstancesNodes()
13346 if self.op.disk_template and self.op.remote_node:
13347 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13348 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13349 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13351 self.needed_locks[locking.LEVEL_NODE_RES] = \
13352 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13354 def BuildHooksEnv(self):
13355 """Build hooks env.
13357 This runs on the master, primary and secondaries.
13361 if constants.BE_MINMEM in self.be_new:
13362 args["minmem"] = self.be_new[constants.BE_MINMEM]
13363 if constants.BE_MAXMEM in self.be_new:
13364 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13365 if constants.BE_VCPUS in self.be_new:
13366 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13367 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13368 # information at all.
13370 if self._new_nics is not None:
13373 for nic in self._new_nics:
13374 n = copy.deepcopy(nic)
13375 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13376 n.nicparams = nicparams
13377 nics.append(_NICToTuple(self, n))
13379 args["nics"] = nics
13381 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13382 if self.op.disk_template:
13383 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13384 if self.op.runtime_mem:
13385 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13389 def BuildHooksNodes(self):
13390 """Build hooks nodes.
13393 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13396 def _PrepareNicModification(self, params, private, old_ip, old_net,
13397 old_params, cluster, pnode):
13399 update_params_dict = dict([(key, params[key])
13400 for key in constants.NICS_PARAMETERS
13403 req_link = update_params_dict.get(constants.NIC_LINK, None)
13404 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13406 new_net = params.get(constants.INIC_NETWORK, old_net)
13407 if new_net is not None:
13408 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13409 if netparams is None:
13410 raise errors.OpPrereqError("No netparams found for the network"
13411 " %s, probably not connected" % new_net,
13412 errors.ECODE_INVAL)
13413 new_params = dict(netparams)
13415 new_params = _GetUpdatedParams(old_params, update_params_dict)
13417 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13419 new_filled_params = cluster.SimpleFillNIC(new_params)
13420 objects.NIC.CheckParameterSyntax(new_filled_params)
13422 new_mode = new_filled_params[constants.NIC_MODE]
13423 if new_mode == constants.NIC_MODE_BRIDGED:
13424 bridge = new_filled_params[constants.NIC_LINK]
13425 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13427 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13429 self.warn.append(msg)
13431 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13433 elif new_mode == constants.NIC_MODE_ROUTED:
13434 ip = params.get(constants.INIC_IP, old_ip)
13436 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13437 " on a routed NIC", errors.ECODE_INVAL)
13439 elif new_mode == constants.NIC_MODE_OVS:
13440 # TODO: check OVS link
13441 self.LogInfo("OVS links are currently not checked for correctness")
13443 if constants.INIC_MAC in params:
13444 mac = params[constants.INIC_MAC]
13446 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13447 errors.ECODE_INVAL)
13448 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13449 # otherwise generate the MAC address
13450 params[constants.INIC_MAC] = \
13451 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13453 # or validate/reserve the current one
13455 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13456 except errors.ReservationError:
13457 raise errors.OpPrereqError("MAC address '%s' already in use"
13458 " in cluster" % mac,
13459 errors.ECODE_NOTUNIQUE)
13460 elif new_net != old_net:
13462 def get_net_prefix(net):
13464 uuid = self.cfg.LookupNetwork(net)
13466 nobj = self.cfg.GetNetwork(uuid)
13467 return nobj.mac_prefix
13470 new_prefix = get_net_prefix(new_net)
13471 old_prefix = get_net_prefix(old_net)
13472 if old_prefix != new_prefix:
13473 params[constants.INIC_MAC] = \
13474 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13476 #if there is a change in nic-network configuration
13477 new_ip = params.get(constants.INIC_IP, old_ip)
13478 if (new_ip, new_net) != (old_ip, old_net):
13481 if new_ip.lower() == constants.NIC_IP_POOL:
13483 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13484 except errors.ReservationError:
13485 raise errors.OpPrereqError("Unable to get a free IP"
13486 " from the address pool",
13487 errors.ECODE_STATE)
13488 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13489 params[constants.INIC_IP] = new_ip
13490 elif new_ip != old_ip or new_net != old_net:
13492 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13493 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13494 except errors.ReservationError:
13495 raise errors.OpPrereqError("IP %s not available in network %s" %
13497 errors.ECODE_NOTUNIQUE)
13498 elif new_ip.lower() == constants.NIC_IP_POOL:
13499 raise errors.OpPrereqError("ip=pool, but no network found",
13500 errors.ECODE_INVAL)
13503 elif self.op.conflicts_check:
13504 _CheckForConflictingIp(self, new_ip, pnode)
13509 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13510 except errors.AddressPoolError:
13511 logging.warning("Release IP %s not contained in network %s",
13514 # there are no changes in (net, ip) tuple
13515 elif (old_net is not None and
13516 (req_link is not None or req_mode is not None)):
13517 raise errors.OpPrereqError("Not allowed to change link or mode of"
13518 " a NIC that is connected to a network",
13519 errors.ECODE_INVAL)
13521 private.params = new_params
13522 private.filled = new_filled_params
13524 def _PreCheckDiskTemplate(self, pnode_info):
13525 """CheckPrereq checks related to a new disk template."""
13526 # Arguments are passed to avoid configuration lookups
13527 instance = self.instance
13528 pnode = instance.primary_node
13529 cluster = self.cluster
13530 if instance.disk_template == self.op.disk_template:
13531 raise errors.OpPrereqError("Instance already has disk template %s" %
13532 instance.disk_template, errors.ECODE_INVAL)
13534 if (instance.disk_template,
13535 self.op.disk_template) not in self._DISK_CONVERSIONS:
13536 raise errors.OpPrereqError("Unsupported disk template conversion from"
13537 " %s to %s" % (instance.disk_template,
13538 self.op.disk_template),
13539 errors.ECODE_INVAL)
13540 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13541 msg="cannot change disk template")
13542 if self.op.disk_template in constants.DTS_INT_MIRROR:
13543 if self.op.remote_node == pnode:
13544 raise errors.OpPrereqError("Given new secondary node %s is the same"
13545 " as the primary node of the instance" %
13546 self.op.remote_node, errors.ECODE_STATE)
13547 _CheckNodeOnline(self, self.op.remote_node)
13548 _CheckNodeNotDrained(self, self.op.remote_node)
13549 # FIXME: here we assume that the old instance type is DT_PLAIN
13550 assert instance.disk_template == constants.DT_PLAIN
13551 disks = [{constants.IDISK_SIZE: d.size,
13552 constants.IDISK_VG: d.logical_id[0]}
13553 for d in instance.disks]
13554 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13555 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13557 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13558 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13559 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13561 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13562 ignore=self.op.ignore_ipolicy)
13563 if pnode_info.group != snode_info.group:
13564 self.LogWarning("The primary and secondary nodes are in two"
13565 " different node groups; the disk parameters"
13566 " from the first disk's node group will be"
13569 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13570 # Make sure none of the nodes require exclusive storage
13571 nodes = [pnode_info]
13572 if self.op.disk_template in constants.DTS_INT_MIRROR:
13574 nodes.append(snode_info)
13575 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13576 if compat.any(map(has_es, nodes)):
13577 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13578 " storage is enabled" % (instance.disk_template,
13579 self.op.disk_template))
13580 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13582 def CheckPrereq(self):
13583 """Check prerequisites.
13585 This only checks the instance list against the existing names.
13588 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13589 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13591 cluster = self.cluster = self.cfg.GetClusterInfo()
13592 assert self.instance is not None, \
13593 "Cannot retrieve locked instance %s" % self.op.instance_name
13595 pnode = instance.primary_node
13596 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13597 nodelist = list(instance.all_nodes)
13598 pnode_info = self.cfg.GetNodeInfo(pnode)
13599 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13601 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13602 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13603 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13605 # dictionary with instance information after the modification
13608 # Check disk modifications. This is done here and not in CheckArguments
13609 # (as with NICs), because we need to know the instance's disk template
13610 if instance.disk_template == constants.DT_EXT:
13611 self._CheckMods("disk", self.op.disks, {},
13612 self._VerifyDiskModification)
13614 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13615 self._VerifyDiskModification)
13617 # Prepare disk/NIC modifications
13618 self.diskmod = PrepareContainerMods(self.op.disks, None)
13619 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13621 # Check the validity of the `provider' parameter
13622 if instance.disk_template in constants.DT_EXT:
13623 for mod in self.diskmod:
13624 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13625 if mod[0] == constants.DDM_ADD:
13626 if ext_provider is None:
13627 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13628 " '%s' missing, during disk add" %
13630 constants.IDISK_PROVIDER),
13631 errors.ECODE_NOENT)
13632 elif mod[0] == constants.DDM_MODIFY:
13634 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13636 constants.IDISK_PROVIDER,
13637 errors.ECODE_INVAL)
13639 for mod in self.diskmod:
13640 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13641 if ext_provider is not None:
13642 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13643 " instances of type '%s'" %
13644 (constants.IDISK_PROVIDER,
13646 errors.ECODE_INVAL)
13649 if self.op.os_name and not self.op.force:
13650 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13651 self.op.force_variant)
13652 instance_os = self.op.os_name
13654 instance_os = instance.os
13656 assert not (self.op.disk_template and self.op.disks), \
13657 "Can't modify disk template and apply disk changes at the same time"
13659 if self.op.disk_template:
13660 self._PreCheckDiskTemplate(pnode_info)
13662 # hvparams processing
13663 if self.op.hvparams:
13664 hv_type = instance.hypervisor
13665 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13666 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13667 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13670 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13671 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13672 self.hv_proposed = self.hv_new = hv_new # the new actual values
13673 self.hv_inst = i_hvdict # the new dict (without defaults)
13675 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13677 self.hv_new = self.hv_inst = {}
13679 # beparams processing
13680 if self.op.beparams:
13681 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13683 objects.UpgradeBeParams(i_bedict)
13684 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13685 be_new = cluster.SimpleFillBE(i_bedict)
13686 self.be_proposed = self.be_new = be_new # the new actual values
13687 self.be_inst = i_bedict # the new dict (without defaults)
13689 self.be_new = self.be_inst = {}
13690 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13691 be_old = cluster.FillBE(instance)
13693 # CPU param validation -- checking every time a parameter is
13694 # changed to cover all cases where either CPU mask or vcpus have
13696 if (constants.BE_VCPUS in self.be_proposed and
13697 constants.HV_CPU_MASK in self.hv_proposed):
13699 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13700 # Verify mask is consistent with number of vCPUs. Can skip this
13701 # test if only 1 entry in the CPU mask, which means same mask
13702 # is applied to all vCPUs.
13703 if (len(cpu_list) > 1 and
13704 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13705 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13707 (self.be_proposed[constants.BE_VCPUS],
13708 self.hv_proposed[constants.HV_CPU_MASK]),
13709 errors.ECODE_INVAL)
13711 # Only perform this test if a new CPU mask is given
13712 if constants.HV_CPU_MASK in self.hv_new:
13713 # Calculate the largest CPU number requested
13714 max_requested_cpu = max(map(max, cpu_list))
13715 # Check that all of the instance's nodes have enough physical CPUs to
13716 # satisfy the requested CPU mask
13717 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13718 max_requested_cpu + 1, instance.hypervisor)
13720 # osparams processing
13721 if self.op.osparams:
13722 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13723 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13724 self.os_inst = i_osdict # the new dict (without defaults)
13730 #TODO(dynmem): do the appropriate check involving MINMEM
13731 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13732 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13733 mem_check_list = [pnode]
13734 if be_new[constants.BE_AUTO_BALANCE]:
13735 # either we changed auto_balance to yes or it was from before
13736 mem_check_list.extend(instance.secondary_nodes)
13737 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13738 instance.hypervisor)
13739 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13740 [instance.hypervisor], False)
13741 pninfo = nodeinfo[pnode]
13742 msg = pninfo.fail_msg
13744 # Assume the primary node is unreachable and go ahead
13745 self.warn.append("Can't get info from primary node %s: %s" %
13748 (_, _, (pnhvinfo, )) = pninfo.payload
13749 if not isinstance(pnhvinfo.get("memory_free", None), int):
13750 self.warn.append("Node data from primary node %s doesn't contain"
13751 " free memory information" % pnode)
13752 elif instance_info.fail_msg:
13753 self.warn.append("Can't get instance runtime information: %s" %
13754 instance_info.fail_msg)
13756 if instance_info.payload:
13757 current_mem = int(instance_info.payload["memory"])
13759 # Assume instance not running
13760 # (there is a slight race condition here, but it's not very
13761 # probable, and we have no other way to check)
13762 # TODO: Describe race condition
13764 #TODO(dynmem): do the appropriate check involving MINMEM
13765 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13766 pnhvinfo["memory_free"])
13768 raise errors.OpPrereqError("This change will prevent the instance"
13769 " from starting, due to %d MB of memory"
13770 " missing on its primary node" %
13771 miss_mem, errors.ECODE_NORES)
13773 if be_new[constants.BE_AUTO_BALANCE]:
13774 for node, nres in nodeinfo.items():
13775 if node not in instance.secondary_nodes:
13777 nres.Raise("Can't get info from secondary node %s" % node,
13778 prereq=True, ecode=errors.ECODE_STATE)
13779 (_, _, (nhvinfo, )) = nres.payload
13780 if not isinstance(nhvinfo.get("memory_free", None), int):
13781 raise errors.OpPrereqError("Secondary node %s didn't return free"
13782 " memory information" % node,
13783 errors.ECODE_STATE)
13784 #TODO(dynmem): do the appropriate check involving MINMEM
13785 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13786 raise errors.OpPrereqError("This change will prevent the instance"
13787 " from failover to its secondary node"
13788 " %s, due to not enough memory" % node,
13789 errors.ECODE_STATE)
13791 if self.op.runtime_mem:
13792 remote_info = self.rpc.call_instance_info(instance.primary_node,
13794 instance.hypervisor)
13795 remote_info.Raise("Error checking node %s" % instance.primary_node)
13796 if not remote_info.payload: # not running already
13797 raise errors.OpPrereqError("Instance %s is not running" %
13798 instance.name, errors.ECODE_STATE)
13800 current_memory = remote_info.payload["memory"]
13801 if (not self.op.force and
13802 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13803 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13804 raise errors.OpPrereqError("Instance %s must have memory between %d"
13805 " and %d MB of memory unless --force is"
13808 self.be_proposed[constants.BE_MINMEM],
13809 self.be_proposed[constants.BE_MAXMEM]),
13810 errors.ECODE_INVAL)
13812 delta = self.op.runtime_mem - current_memory
13814 _CheckNodeFreeMemory(self, instance.primary_node,
13815 "ballooning memory for instance %s" %
13816 instance.name, delta, instance.hypervisor)
13818 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13819 raise errors.OpPrereqError("Disk operations not supported for"
13820 " diskless instances", errors.ECODE_INVAL)
13822 def _PrepareNicCreate(_, params, private):
13823 self._PrepareNicModification(params, private, None, None,
13824 {}, cluster, pnode)
13825 return (None, None)
13827 def _PrepareNicMod(_, nic, params, private):
13828 self._PrepareNicModification(params, private, nic.ip, nic.network,
13829 nic.nicparams, cluster, pnode)
13832 def _PrepareNicRemove(_, params, __):
13834 net = params.network
13835 if net is not None and ip is not None:
13836 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13838 # Verify NIC changes (operating on copy)
13839 nics = instance.nics[:]
13840 ApplyContainerMods("NIC", nics, None, self.nicmod,
13841 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13842 if len(nics) > constants.MAX_NICS:
13843 raise errors.OpPrereqError("Instance has too many network interfaces"
13844 " (%d), cannot add more" % constants.MAX_NICS,
13845 errors.ECODE_STATE)
13847 # Verify disk changes (operating on a copy)
13848 disks = instance.disks[:]
13849 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13850 if len(disks) > constants.MAX_DISKS:
13851 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13852 " more" % constants.MAX_DISKS,
13853 errors.ECODE_STATE)
13854 disk_sizes = [disk.size for disk in instance.disks]
13855 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13856 self.diskmod if op == constants.DDM_ADD)
13857 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13858 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13860 if self.op.offline is not None and self.op.offline:
13861 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13862 msg="can't change to offline")
13864 # Pre-compute NIC changes (necessary to use result in hooks)
13865 self._nic_chgdesc = []
13867 # Operate on copies as this is still in prereq
13868 nics = [nic.Copy() for nic in instance.nics]
13869 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13870 self._CreateNewNic, self._ApplyNicMods, None)
13871 self._new_nics = nics
13872 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13874 self._new_nics = None
13875 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13877 if not self.op.ignore_ipolicy:
13878 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13881 # Fill ispec with backend parameters
13882 ispec[constants.ISPEC_SPINDLE_USE] = \
13883 self.be_new.get(constants.BE_SPINDLE_USE, None)
13884 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13887 # Copy ispec to verify parameters with min/max values separately
13888 ispec_max = ispec.copy()
13889 ispec_max[constants.ISPEC_MEM_SIZE] = \
13890 self.be_new.get(constants.BE_MAXMEM, None)
13891 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13892 ispec_min = ispec.copy()
13893 ispec_min[constants.ISPEC_MEM_SIZE] = \
13894 self.be_new.get(constants.BE_MINMEM, None)
13895 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13897 if (res_max or res_min):
13898 # FIXME: Improve error message by including information about whether
13899 # the upper or lower limit of the parameter fails the ipolicy.
13900 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13901 (group_info, group_info.name,
13902 utils.CommaJoin(set(res_max + res_min))))
13903 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13905 def _ConvertPlainToDrbd(self, feedback_fn):
13906 """Converts an instance from plain to drbd.
13909 feedback_fn("Converting template to drbd")
13910 instance = self.instance
13911 pnode = instance.primary_node
13912 snode = self.op.remote_node
13914 assert instance.disk_template == constants.DT_PLAIN
13916 # create a fake disk info for _GenerateDiskTemplate
13917 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13918 constants.IDISK_VG: d.logical_id[0]}
13919 for d in instance.disks]
13920 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13921 instance.name, pnode, [snode],
13922 disk_info, None, None, 0, feedback_fn,
13924 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13926 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13927 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13928 info = _GetInstanceInfoText(instance)
13929 feedback_fn("Creating additional volumes...")
13930 # first, create the missing data and meta devices
13931 for disk in anno_disks:
13932 # unfortunately this is... not too nice
13933 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13934 info, True, p_excl_stor)
13935 for child in disk.children:
13936 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13938 # at this stage, all new LVs have been created, we can rename the
13940 feedback_fn("Renaming original volumes...")
13941 rename_list = [(o, n.children[0].logical_id)
13942 for (o, n) in zip(instance.disks, new_disks)]
13943 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13944 result.Raise("Failed to rename original LVs")
13946 feedback_fn("Initializing DRBD devices...")
13947 # all child devices are in place, we can now create the DRBD devices
13948 for disk in anno_disks:
13949 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13950 f_create = node == pnode
13951 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13954 # at this point, the instance has been modified
13955 instance.disk_template = constants.DT_DRBD8
13956 instance.disks = new_disks
13957 self.cfg.Update(instance, feedback_fn)
13959 # Release node locks while waiting for sync
13960 _ReleaseLocks(self, locking.LEVEL_NODE)
13962 # disks are created, waiting for sync
13963 disk_abort = not _WaitForSync(self, instance,
13964 oneshot=not self.op.wait_for_sync)
13966 raise errors.OpExecError("There are some degraded disks for"
13967 " this instance, please cleanup manually")
13969 # Node resource locks will be released by caller
13971 def _ConvertDrbdToPlain(self, feedback_fn):
13972 """Converts an instance from drbd to plain.
13975 instance = self.instance
13977 assert len(instance.secondary_nodes) == 1
13978 assert instance.disk_template == constants.DT_DRBD8
13980 pnode = instance.primary_node
13981 snode = instance.secondary_nodes[0]
13982 feedback_fn("Converting template to plain")
13984 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13985 new_disks = [d.children[0] for d in instance.disks]
13987 # copy over size and mode
13988 for parent, child in zip(old_disks, new_disks):
13989 child.size = parent.size
13990 child.mode = parent.mode
13992 # this is a DRBD disk, return its port to the pool
13993 # NOTE: this must be done right before the call to cfg.Update!
13994 for disk in old_disks:
13995 tcp_port = disk.logical_id[2]
13996 self.cfg.AddTcpUdpPort(tcp_port)
13998 # update instance structure
13999 instance.disks = new_disks
14000 instance.disk_template = constants.DT_PLAIN
14001 self.cfg.Update(instance, feedback_fn)
14003 # Release locks in case removing disks takes a while
14004 _ReleaseLocks(self, locking.LEVEL_NODE)
14006 feedback_fn("Removing volumes on the secondary node...")
14007 for disk in old_disks:
14008 self.cfg.SetDiskID(disk, snode)
14009 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14011 self.LogWarning("Could not remove block device %s on node %s,"
14012 " continuing anyway: %s", disk.iv_name, snode, msg)
14014 feedback_fn("Removing unneeded volumes on the primary node...")
14015 for idx, disk in enumerate(old_disks):
14016 meta = disk.children[1]
14017 self.cfg.SetDiskID(meta, pnode)
14018 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14020 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14021 " continuing anyway: %s", idx, pnode, msg)
14023 def _CreateNewDisk(self, idx, params, _):
14024 """Creates a new disk.
14027 instance = self.instance
14030 if instance.disk_template in constants.DTS_FILEBASED:
14031 (file_driver, file_path) = instance.disks[0].logical_id
14032 file_path = os.path.dirname(file_path)
14034 file_driver = file_path = None
14037 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14038 instance.primary_node, instance.secondary_nodes,
14039 [params], file_path, file_driver, idx,
14040 self.Log, self.diskparams)[0]
14042 info = _GetInstanceInfoText(instance)
14044 logging.info("Creating volume %s for instance %s",
14045 disk.iv_name, instance.name)
14046 # Note: this needs to be kept in sync with _CreateDisks
14048 for node in instance.all_nodes:
14049 f_create = (node == instance.primary_node)
14051 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14052 except errors.OpExecError, err:
14053 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14054 disk.iv_name, disk, node, err)
14057 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14061 def _ModifyDisk(idx, disk, params, _):
14062 """Modifies a disk.
14065 disk.mode = params[constants.IDISK_MODE]
14068 ("disk.mode/%d" % idx, disk.mode),
14071 def _RemoveDisk(self, idx, root, _):
14075 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14076 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14077 self.cfg.SetDiskID(disk, node)
14078 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14080 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14081 " continuing anyway", idx, node, msg)
14083 # if this is a DRBD disk, return its port to the pool
14084 if root.dev_type in constants.LDS_DRBD:
14085 self.cfg.AddTcpUdpPort(root.logical_id[2])
14088 def _CreateNewNic(idx, params, private):
14089 """Creates data structure for a new network interface.
14092 mac = params[constants.INIC_MAC]
14093 ip = params.get(constants.INIC_IP, None)
14094 net = params.get(constants.INIC_NETWORK, None)
14095 #TODO: not private.filled?? can a nic have no nicparams??
14096 nicparams = private.filled
14098 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14100 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14101 (mac, ip, private.filled[constants.NIC_MODE],
14102 private.filled[constants.NIC_LINK],
14107 def _ApplyNicMods(idx, nic, params, private):
14108 """Modifies a network interface.
14113 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14115 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14116 setattr(nic, key, params[key])
14119 nic.nicparams = private.filled
14121 for (key, val) in nic.nicparams.items():
14122 changes.append(("nic.%s/%d" % (key, idx), val))
14126 def Exec(self, feedback_fn):
14127 """Modifies an instance.
14129 All parameters take effect only at the next restart of the instance.
14132 # Process here the warnings from CheckPrereq, as we don't have a
14133 # feedback_fn there.
14134 # TODO: Replace with self.LogWarning
14135 for warn in self.warn:
14136 feedback_fn("WARNING: %s" % warn)
14138 assert ((self.op.disk_template is None) ^
14139 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14140 "Not owning any node resource locks"
14143 instance = self.instance
14146 if self.op.runtime_mem:
14147 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14149 self.op.runtime_mem)
14150 rpcres.Raise("Cannot modify instance runtime memory")
14151 result.append(("runtime_memory", self.op.runtime_mem))
14153 # Apply disk changes
14154 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14155 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14156 _UpdateIvNames(0, instance.disks)
14158 if self.op.disk_template:
14160 check_nodes = set(instance.all_nodes)
14161 if self.op.remote_node:
14162 check_nodes.add(self.op.remote_node)
14163 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14164 owned = self.owned_locks(level)
14165 assert not (check_nodes - owned), \
14166 ("Not owning the correct locks, owning %r, expected at least %r" %
14167 (owned, check_nodes))
14169 r_shut = _ShutdownInstanceDisks(self, instance)
14171 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14172 " proceed with disk template conversion")
14173 mode = (instance.disk_template, self.op.disk_template)
14175 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14177 self.cfg.ReleaseDRBDMinors(instance.name)
14179 result.append(("disk_template", self.op.disk_template))
14181 assert instance.disk_template == self.op.disk_template, \
14182 ("Expected disk template '%s', found '%s'" %
14183 (self.op.disk_template, instance.disk_template))
14185 # Release node and resource locks if there are any (they might already have
14186 # been released during disk conversion)
14187 _ReleaseLocks(self, locking.LEVEL_NODE)
14188 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14190 # Apply NIC changes
14191 if self._new_nics is not None:
14192 instance.nics = self._new_nics
14193 result.extend(self._nic_chgdesc)
14196 if self.op.hvparams:
14197 instance.hvparams = self.hv_inst
14198 for key, val in self.op.hvparams.iteritems():
14199 result.append(("hv/%s" % key, val))
14202 if self.op.beparams:
14203 instance.beparams = self.be_inst
14204 for key, val in self.op.beparams.iteritems():
14205 result.append(("be/%s" % key, val))
14208 if self.op.os_name:
14209 instance.os = self.op.os_name
14212 if self.op.osparams:
14213 instance.osparams = self.os_inst
14214 for key, val in self.op.osparams.iteritems():
14215 result.append(("os/%s" % key, val))
14217 if self.op.offline is None:
14220 elif self.op.offline:
14221 # Mark instance as offline
14222 self.cfg.MarkInstanceOffline(instance.name)
14223 result.append(("admin_state", constants.ADMINST_OFFLINE))
14225 # Mark instance as online, but stopped
14226 self.cfg.MarkInstanceDown(instance.name)
14227 result.append(("admin_state", constants.ADMINST_DOWN))
14229 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14231 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14232 self.owned_locks(locking.LEVEL_NODE)), \
14233 "All node locks should have been released by now"
14237 _DISK_CONVERSIONS = {
14238 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14239 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14243 class LUInstanceChangeGroup(LogicalUnit):
14244 HPATH = "instance-change-group"
14245 HTYPE = constants.HTYPE_INSTANCE
14248 def ExpandNames(self):
14249 self.share_locks = _ShareAll()
14251 self.needed_locks = {
14252 locking.LEVEL_NODEGROUP: [],
14253 locking.LEVEL_NODE: [],
14254 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14257 self._ExpandAndLockInstance()
14259 if self.op.target_groups:
14260 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14261 self.op.target_groups)
14263 self.req_target_uuids = None
14265 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14267 def DeclareLocks(self, level):
14268 if level == locking.LEVEL_NODEGROUP:
14269 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14271 if self.req_target_uuids:
14272 lock_groups = set(self.req_target_uuids)
14274 # Lock all groups used by instance optimistically; this requires going
14275 # via the node before it's locked, requiring verification later on
14276 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14277 lock_groups.update(instance_groups)
14279 # No target groups, need to lock all of them
14280 lock_groups = locking.ALL_SET
14282 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14284 elif level == locking.LEVEL_NODE:
14285 if self.req_target_uuids:
14286 # Lock all nodes used by instances
14287 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14288 self._LockInstancesNodes()
14290 # Lock all nodes in all potential target groups
14291 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14292 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14293 member_nodes = [node_name
14294 for group in lock_groups
14295 for node_name in self.cfg.GetNodeGroup(group).members]
14296 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14298 # Lock all nodes as all groups are potential targets
14299 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14301 def CheckPrereq(self):
14302 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14303 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14304 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14306 assert (self.req_target_uuids is None or
14307 owned_groups.issuperset(self.req_target_uuids))
14308 assert owned_instances == set([self.op.instance_name])
14310 # Get instance information
14311 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14313 # Check if node groups for locked instance are still correct
14314 assert owned_nodes.issuperset(self.instance.all_nodes), \
14315 ("Instance %s's nodes changed while we kept the lock" %
14316 self.op.instance_name)
14318 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14321 if self.req_target_uuids:
14322 # User requested specific target groups
14323 self.target_uuids = frozenset(self.req_target_uuids)
14325 # All groups except those used by the instance are potential targets
14326 self.target_uuids = owned_groups - inst_groups
14328 conflicting_groups = self.target_uuids & inst_groups
14329 if conflicting_groups:
14330 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14331 " used by the instance '%s'" %
14332 (utils.CommaJoin(conflicting_groups),
14333 self.op.instance_name),
14334 errors.ECODE_INVAL)
14336 if not self.target_uuids:
14337 raise errors.OpPrereqError("There are no possible target groups",
14338 errors.ECODE_INVAL)
14340 def BuildHooksEnv(self):
14341 """Build hooks env.
14344 assert self.target_uuids
14347 "TARGET_GROUPS": " ".join(self.target_uuids),
14350 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14354 def BuildHooksNodes(self):
14355 """Build hooks nodes.
14358 mn = self.cfg.GetMasterNode()
14359 return ([mn], [mn])
14361 def Exec(self, feedback_fn):
14362 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14364 assert instances == [self.op.instance_name], "Instance not locked"
14366 req = iallocator.IAReqGroupChange(instances=instances,
14367 target_groups=list(self.target_uuids))
14368 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14370 ial.Run(self.op.iallocator)
14372 if not ial.success:
14373 raise errors.OpPrereqError("Can't compute solution for changing group of"
14374 " instance '%s' using iallocator '%s': %s" %
14375 (self.op.instance_name, self.op.iallocator,
14376 ial.info), errors.ECODE_NORES)
14378 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14380 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14381 " instance '%s'", len(jobs), self.op.instance_name)
14383 return ResultWithJobs(jobs)
14386 class LUBackupQuery(NoHooksLU):
14387 """Query the exports list
14392 def CheckArguments(self):
14393 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14394 ["node", "export"], self.op.use_locking)
14396 def ExpandNames(self):
14397 self.expq.ExpandNames(self)
14399 def DeclareLocks(self, level):
14400 self.expq.DeclareLocks(self, level)
14402 def Exec(self, feedback_fn):
14405 for (node, expname) in self.expq.OldStyleQuery(self):
14406 if expname is None:
14407 result[node] = False
14409 result.setdefault(node, []).append(expname)
14414 class _ExportQuery(_QueryBase):
14415 FIELDS = query.EXPORT_FIELDS
14417 #: The node name is not a unique key for this query
14418 SORT_FIELD = "node"
14420 def ExpandNames(self, lu):
14421 lu.needed_locks = {}
14423 # The following variables interact with _QueryBase._GetNames
14425 self.wanted = _GetWantedNodes(lu, self.names)
14427 self.wanted = locking.ALL_SET
14429 self.do_locking = self.use_locking
14431 if self.do_locking:
14432 lu.share_locks = _ShareAll()
14433 lu.needed_locks = {
14434 locking.LEVEL_NODE: self.wanted,
14438 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14440 def DeclareLocks(self, lu, level):
14443 def _GetQueryData(self, lu):
14444 """Computes the list of nodes and their attributes.
14447 # Locking is not used
14449 assert not (compat.any(lu.glm.is_owned(level)
14450 for level in locking.LEVELS
14451 if level != locking.LEVEL_CLUSTER) or
14452 self.do_locking or self.use_locking)
14454 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14458 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14460 result.append((node, None))
14462 result.extend((node, expname) for expname in nres.payload)
14467 class LUBackupPrepare(NoHooksLU):
14468 """Prepares an instance for an export and returns useful information.
14473 def ExpandNames(self):
14474 self._ExpandAndLockInstance()
14476 def CheckPrereq(self):
14477 """Check prerequisites.
14480 instance_name = self.op.instance_name
14482 self.instance = self.cfg.GetInstanceInfo(instance_name)
14483 assert self.instance is not None, \
14484 "Cannot retrieve locked instance %s" % self.op.instance_name
14485 _CheckNodeOnline(self, self.instance.primary_node)
14487 self._cds = _GetClusterDomainSecret()
14489 def Exec(self, feedback_fn):
14490 """Prepares an instance for an export.
14493 instance = self.instance
14495 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14496 salt = utils.GenerateSecret(8)
14498 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14499 result = self.rpc.call_x509_cert_create(instance.primary_node,
14500 constants.RIE_CERT_VALIDITY)
14501 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14503 (name, cert_pem) = result.payload
14505 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14509 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14510 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14512 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14518 class LUBackupExport(LogicalUnit):
14519 """Export an instance to an image in the cluster.
14522 HPATH = "instance-export"
14523 HTYPE = constants.HTYPE_INSTANCE
14526 def CheckArguments(self):
14527 """Check the arguments.
14530 self.x509_key_name = self.op.x509_key_name
14531 self.dest_x509_ca_pem = self.op.destination_x509_ca
14533 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14534 if not self.x509_key_name:
14535 raise errors.OpPrereqError("Missing X509 key name for encryption",
14536 errors.ECODE_INVAL)
14538 if not self.dest_x509_ca_pem:
14539 raise errors.OpPrereqError("Missing destination X509 CA",
14540 errors.ECODE_INVAL)
14542 def ExpandNames(self):
14543 self._ExpandAndLockInstance()
14545 # Lock all nodes for local exports
14546 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14547 # FIXME: lock only instance primary and destination node
14549 # Sad but true, for now we have do lock all nodes, as we don't know where
14550 # the previous export might be, and in this LU we search for it and
14551 # remove it from its current node. In the future we could fix this by:
14552 # - making a tasklet to search (share-lock all), then create the
14553 # new one, then one to remove, after
14554 # - removing the removal operation altogether
14555 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14557 # Allocations should be stopped while this LU runs with node locks, but
14558 # it doesn't have to be exclusive
14559 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14560 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14562 def DeclareLocks(self, level):
14563 """Last minute lock declaration."""
14564 # All nodes are locked anyway, so nothing to do here.
14566 def BuildHooksEnv(self):
14567 """Build hooks env.
14569 This will run on the master, primary node and target node.
14573 "EXPORT_MODE": self.op.mode,
14574 "EXPORT_NODE": self.op.target_node,
14575 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14576 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14577 # TODO: Generic function for boolean env variables
14578 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14581 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14585 def BuildHooksNodes(self):
14586 """Build hooks nodes.
14589 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14591 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14592 nl.append(self.op.target_node)
14596 def CheckPrereq(self):
14597 """Check prerequisites.
14599 This checks that the instance and node names are valid.
14602 instance_name = self.op.instance_name
14604 self.instance = self.cfg.GetInstanceInfo(instance_name)
14605 assert self.instance is not None, \
14606 "Cannot retrieve locked instance %s" % self.op.instance_name
14607 _CheckNodeOnline(self, self.instance.primary_node)
14609 if (self.op.remove_instance and
14610 self.instance.admin_state == constants.ADMINST_UP and
14611 not self.op.shutdown):
14612 raise errors.OpPrereqError("Can not remove instance without shutting it"
14613 " down before", errors.ECODE_STATE)
14615 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14616 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14617 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14618 assert self.dst_node is not None
14620 _CheckNodeOnline(self, self.dst_node.name)
14621 _CheckNodeNotDrained(self, self.dst_node.name)
14624 self.dest_disk_info = None
14625 self.dest_x509_ca = None
14627 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14628 self.dst_node = None
14630 if len(self.op.target_node) != len(self.instance.disks):
14631 raise errors.OpPrereqError(("Received destination information for %s"
14632 " disks, but instance %s has %s disks") %
14633 (len(self.op.target_node), instance_name,
14634 len(self.instance.disks)),
14635 errors.ECODE_INVAL)
14637 cds = _GetClusterDomainSecret()
14639 # Check X509 key name
14641 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14642 except (TypeError, ValueError), err:
14643 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14644 errors.ECODE_INVAL)
14646 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14647 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14648 errors.ECODE_INVAL)
14650 # Load and verify CA
14652 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14653 except OpenSSL.crypto.Error, err:
14654 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14655 (err, ), errors.ECODE_INVAL)
14657 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14658 if errcode is not None:
14659 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14660 (msg, ), errors.ECODE_INVAL)
14662 self.dest_x509_ca = cert
14664 # Verify target information
14666 for idx, disk_data in enumerate(self.op.target_node):
14668 (host, port, magic) = \
14669 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14670 except errors.GenericError, err:
14671 raise errors.OpPrereqError("Target info for disk %s: %s" %
14672 (idx, err), errors.ECODE_INVAL)
14674 disk_info.append((host, port, magic))
14676 assert len(disk_info) == len(self.op.target_node)
14677 self.dest_disk_info = disk_info
14680 raise errors.ProgrammerError("Unhandled export mode %r" %
14683 # instance disk type verification
14684 # TODO: Implement export support for file-based disks
14685 for disk in self.instance.disks:
14686 if disk.dev_type == constants.LD_FILE:
14687 raise errors.OpPrereqError("Export not supported for instances with"
14688 " file-based disks", errors.ECODE_INVAL)
14690 def _CleanupExports(self, feedback_fn):
14691 """Removes exports of current instance from all other nodes.
14693 If an instance in a cluster with nodes A..D was exported to node C, its
14694 exports will be removed from the nodes A, B and D.
14697 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14699 nodelist = self.cfg.GetNodeList()
14700 nodelist.remove(self.dst_node.name)
14702 # on one-node clusters nodelist will be empty after the removal
14703 # if we proceed the backup would be removed because OpBackupQuery
14704 # substitutes an empty list with the full cluster node list.
14705 iname = self.instance.name
14707 feedback_fn("Removing old exports for instance %s" % iname)
14708 exportlist = self.rpc.call_export_list(nodelist)
14709 for node in exportlist:
14710 if exportlist[node].fail_msg:
14712 if iname in exportlist[node].payload:
14713 msg = self.rpc.call_export_remove(node, iname).fail_msg
14715 self.LogWarning("Could not remove older export for instance %s"
14716 " on node %s: %s", iname, node, msg)
14718 def Exec(self, feedback_fn):
14719 """Export an instance to an image in the cluster.
14722 assert self.op.mode in constants.EXPORT_MODES
14724 instance = self.instance
14725 src_node = instance.primary_node
14727 if self.op.shutdown:
14728 # shutdown the instance, but not the disks
14729 feedback_fn("Shutting down instance %s" % instance.name)
14730 result = self.rpc.call_instance_shutdown(src_node, instance,
14731 self.op.shutdown_timeout)
14732 # TODO: Maybe ignore failures if ignore_remove_failures is set
14733 result.Raise("Could not shutdown instance %s on"
14734 " node %s" % (instance.name, src_node))
14736 # set the disks ID correctly since call_instance_start needs the
14737 # correct drbd minor to create the symlinks
14738 for disk in instance.disks:
14739 self.cfg.SetDiskID(disk, src_node)
14741 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14744 # Activate the instance disks if we'exporting a stopped instance
14745 feedback_fn("Activating disks for %s" % instance.name)
14746 _StartInstanceDisks(self, instance, None)
14749 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14752 helper.CreateSnapshots()
14754 if (self.op.shutdown and
14755 instance.admin_state == constants.ADMINST_UP and
14756 not self.op.remove_instance):
14757 assert not activate_disks
14758 feedback_fn("Starting instance %s" % instance.name)
14759 result = self.rpc.call_instance_start(src_node,
14760 (instance, None, None), False)
14761 msg = result.fail_msg
14763 feedback_fn("Failed to start instance: %s" % msg)
14764 _ShutdownInstanceDisks(self, instance)
14765 raise errors.OpExecError("Could not start instance: %s" % msg)
14767 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14768 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14769 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14770 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14771 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14773 (key_name, _, _) = self.x509_key_name
14776 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14779 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14780 key_name, dest_ca_pem,
14785 # Check for backwards compatibility
14786 assert len(dresults) == len(instance.disks)
14787 assert compat.all(isinstance(i, bool) for i in dresults), \
14788 "Not all results are boolean: %r" % dresults
14792 feedback_fn("Deactivating disks for %s" % instance.name)
14793 _ShutdownInstanceDisks(self, instance)
14795 if not (compat.all(dresults) and fin_resu):
14798 failures.append("export finalization")
14799 if not compat.all(dresults):
14800 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14802 failures.append("disk export: disk(s) %s" % fdsk)
14804 raise errors.OpExecError("Export failed, errors in %s" %
14805 utils.CommaJoin(failures))
14807 # At this point, the export was successful, we can cleanup/finish
14809 # Remove instance if requested
14810 if self.op.remove_instance:
14811 feedback_fn("Removing instance %s" % instance.name)
14812 _RemoveInstance(self, feedback_fn, instance,
14813 self.op.ignore_remove_failures)
14815 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14816 self._CleanupExports(feedback_fn)
14818 return fin_resu, dresults
14821 class LUBackupRemove(NoHooksLU):
14822 """Remove exports related to the named instance.
14827 def ExpandNames(self):
14828 self.needed_locks = {
14829 # We need all nodes to be locked in order for RemoveExport to work, but
14830 # we don't need to lock the instance itself, as nothing will happen to it
14831 # (and we can remove exports also for a removed instance)
14832 locking.LEVEL_NODE: locking.ALL_SET,
14834 # Removing backups is quick, so blocking allocations is justified
14835 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14838 # Allocations should be stopped while this LU runs with node locks, but it
14839 # doesn't have to be exclusive
14840 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14842 def Exec(self, feedback_fn):
14843 """Remove any export.
14846 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14847 # If the instance was not found we'll try with the name that was passed in.
14848 # This will only work if it was an FQDN, though.
14850 if not instance_name:
14852 instance_name = self.op.instance_name
14854 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14855 exportlist = self.rpc.call_export_list(locked_nodes)
14857 for node in exportlist:
14858 msg = exportlist[node].fail_msg
14860 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14862 if instance_name in exportlist[node].payload:
14864 result = self.rpc.call_export_remove(node, instance_name)
14865 msg = result.fail_msg
14867 logging.error("Could not remove export for instance %s"
14868 " on node %s: %s", instance_name, node, msg)
14870 if fqdn_warn and not found:
14871 feedback_fn("Export not found. If trying to remove an export belonging"
14872 " to a deleted instance please use its Fully Qualified"
14876 class LUGroupAdd(LogicalUnit):
14877 """Logical unit for creating node groups.
14880 HPATH = "group-add"
14881 HTYPE = constants.HTYPE_GROUP
14884 def ExpandNames(self):
14885 # We need the new group's UUID here so that we can create and acquire the
14886 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14887 # that it should not check whether the UUID exists in the configuration.
14888 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14889 self.needed_locks = {}
14890 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14892 def CheckPrereq(self):
14893 """Check prerequisites.
14895 This checks that the given group name is not an existing node group
14900 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14901 except errors.OpPrereqError:
14904 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14905 " node group (UUID: %s)" %
14906 (self.op.group_name, existing_uuid),
14907 errors.ECODE_EXISTS)
14909 if self.op.ndparams:
14910 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14912 if self.op.hv_state:
14913 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14915 self.new_hv_state = None
14917 if self.op.disk_state:
14918 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14920 self.new_disk_state = None
14922 if self.op.diskparams:
14923 for templ in constants.DISK_TEMPLATES:
14924 if templ in self.op.diskparams:
14925 utils.ForceDictType(self.op.diskparams[templ],
14926 constants.DISK_DT_TYPES)
14927 self.new_diskparams = self.op.diskparams
14929 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14930 except errors.OpPrereqError, err:
14931 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14932 errors.ECODE_INVAL)
14934 self.new_diskparams = {}
14936 if self.op.ipolicy:
14937 cluster = self.cfg.GetClusterInfo()
14938 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14940 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14941 except errors.ConfigurationError, err:
14942 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14943 errors.ECODE_INVAL)
14945 def BuildHooksEnv(self):
14946 """Build hooks env.
14950 "GROUP_NAME": self.op.group_name,
14953 def BuildHooksNodes(self):
14954 """Build hooks nodes.
14957 mn = self.cfg.GetMasterNode()
14958 return ([mn], [mn])
14960 def Exec(self, feedback_fn):
14961 """Add the node group to the cluster.
14964 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14965 uuid=self.group_uuid,
14966 alloc_policy=self.op.alloc_policy,
14967 ndparams=self.op.ndparams,
14968 diskparams=self.new_diskparams,
14969 ipolicy=self.op.ipolicy,
14970 hv_state_static=self.new_hv_state,
14971 disk_state_static=self.new_disk_state)
14973 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14974 del self.remove_locks[locking.LEVEL_NODEGROUP]
14977 class LUGroupAssignNodes(NoHooksLU):
14978 """Logical unit for assigning nodes to groups.
14983 def ExpandNames(self):
14984 # These raise errors.OpPrereqError on their own:
14985 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14986 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14988 # We want to lock all the affected nodes and groups. We have readily
14989 # available the list of nodes, and the *destination* group. To gather the
14990 # list of "source" groups, we need to fetch node information later on.
14991 self.needed_locks = {
14992 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14993 locking.LEVEL_NODE: self.op.nodes,
14996 def DeclareLocks(self, level):
14997 if level == locking.LEVEL_NODEGROUP:
14998 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15000 # Try to get all affected nodes' groups without having the group or node
15001 # lock yet. Needs verification later in the code flow.
15002 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15004 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15006 def CheckPrereq(self):
15007 """Check prerequisites.
15010 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15011 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15012 frozenset(self.op.nodes))
15014 expected_locks = (set([self.group_uuid]) |
15015 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15016 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15017 if actual_locks != expected_locks:
15018 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15019 " current groups are '%s', used to be '%s'" %
15020 (utils.CommaJoin(expected_locks),
15021 utils.CommaJoin(actual_locks)))
15023 self.node_data = self.cfg.GetAllNodesInfo()
15024 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15025 instance_data = self.cfg.GetAllInstancesInfo()
15027 if self.group is None:
15028 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15029 (self.op.group_name, self.group_uuid))
15031 (new_splits, previous_splits) = \
15032 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15033 for node in self.op.nodes],
15034 self.node_data, instance_data)
15037 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15039 if not self.op.force:
15040 raise errors.OpExecError("The following instances get split by this"
15041 " change and --force was not given: %s" %
15044 self.LogWarning("This operation will split the following instances: %s",
15047 if previous_splits:
15048 self.LogWarning("In addition, these already-split instances continue"
15049 " to be split across groups: %s",
15050 utils.CommaJoin(utils.NiceSort(previous_splits)))
15052 def Exec(self, feedback_fn):
15053 """Assign nodes to a new group.
15056 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15058 self.cfg.AssignGroupNodes(mods)
15061 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15062 """Check for split instances after a node assignment.
15064 This method considers a series of node assignments as an atomic operation,
15065 and returns information about split instances after applying the set of
15068 In particular, it returns information about newly split instances, and
15069 instances that were already split, and remain so after the change.
15071 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15074 @type changes: list of (node_name, new_group_uuid) pairs.
15075 @param changes: list of node assignments to consider.
15076 @param node_data: a dict with data for all nodes
15077 @param instance_data: a dict with all instances to consider
15078 @rtype: a two-tuple
15079 @return: a list of instances that were previously okay and result split as a
15080 consequence of this change, and a list of instances that were previously
15081 split and this change does not fix.
15084 changed_nodes = dict((node, group) for node, group in changes
15085 if node_data[node].group != group)
15087 all_split_instances = set()
15088 previously_split_instances = set()
15090 def InstanceNodes(instance):
15091 return [instance.primary_node] + list(instance.secondary_nodes)
15093 for inst in instance_data.values():
15094 if inst.disk_template not in constants.DTS_INT_MIRROR:
15097 instance_nodes = InstanceNodes(inst)
15099 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15100 previously_split_instances.add(inst.name)
15102 if len(set(changed_nodes.get(node, node_data[node].group)
15103 for node in instance_nodes)) > 1:
15104 all_split_instances.add(inst.name)
15106 return (list(all_split_instances - previously_split_instances),
15107 list(previously_split_instances & all_split_instances))
15110 class _GroupQuery(_QueryBase):
15111 FIELDS = query.GROUP_FIELDS
15113 def ExpandNames(self, lu):
15114 lu.needed_locks = {}
15116 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15117 self._cluster = lu.cfg.GetClusterInfo()
15118 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15121 self.wanted = [name_to_uuid[name]
15122 for name in utils.NiceSort(name_to_uuid.keys())]
15124 # Accept names to be either names or UUIDs.
15127 all_uuid = frozenset(self._all_groups.keys())
15129 for name in self.names:
15130 if name in all_uuid:
15131 self.wanted.append(name)
15132 elif name in name_to_uuid:
15133 self.wanted.append(name_to_uuid[name])
15135 missing.append(name)
15138 raise errors.OpPrereqError("Some groups do not exist: %s" %
15139 utils.CommaJoin(missing),
15140 errors.ECODE_NOENT)
15142 def DeclareLocks(self, lu, level):
15145 def _GetQueryData(self, lu):
15146 """Computes the list of node groups and their attributes.
15149 do_nodes = query.GQ_NODE in self.requested_data
15150 do_instances = query.GQ_INST in self.requested_data
15152 group_to_nodes = None
15153 group_to_instances = None
15155 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15156 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15157 # latter GetAllInstancesInfo() is not enough, for we have to go through
15158 # instance->node. Hence, we will need to process nodes even if we only need
15159 # instance information.
15160 if do_nodes or do_instances:
15161 all_nodes = lu.cfg.GetAllNodesInfo()
15162 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15165 for node in all_nodes.values():
15166 if node.group in group_to_nodes:
15167 group_to_nodes[node.group].append(node.name)
15168 node_to_group[node.name] = node.group
15171 all_instances = lu.cfg.GetAllInstancesInfo()
15172 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15174 for instance in all_instances.values():
15175 node = instance.primary_node
15176 if node in node_to_group:
15177 group_to_instances[node_to_group[node]].append(instance.name)
15180 # Do not pass on node information if it was not requested.
15181 group_to_nodes = None
15183 return query.GroupQueryData(self._cluster,
15184 [self._all_groups[uuid]
15185 for uuid in self.wanted],
15186 group_to_nodes, group_to_instances,
15187 query.GQ_DISKPARAMS in self.requested_data)
15190 class LUGroupQuery(NoHooksLU):
15191 """Logical unit for querying node groups.
15196 def CheckArguments(self):
15197 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15198 self.op.output_fields, False)
15200 def ExpandNames(self):
15201 self.gq.ExpandNames(self)
15203 def DeclareLocks(self, level):
15204 self.gq.DeclareLocks(self, level)
15206 def Exec(self, feedback_fn):
15207 return self.gq.OldStyleQuery(self)
15210 class LUGroupSetParams(LogicalUnit):
15211 """Modifies the parameters of a node group.
15214 HPATH = "group-modify"
15215 HTYPE = constants.HTYPE_GROUP
15218 def CheckArguments(self):
15221 self.op.diskparams,
15222 self.op.alloc_policy,
15224 self.op.disk_state,
15228 if all_changes.count(None) == len(all_changes):
15229 raise errors.OpPrereqError("Please pass at least one modification",
15230 errors.ECODE_INVAL)
15232 def ExpandNames(self):
15233 # This raises errors.OpPrereqError on its own:
15234 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15236 self.needed_locks = {
15237 locking.LEVEL_INSTANCE: [],
15238 locking.LEVEL_NODEGROUP: [self.group_uuid],
15241 self.share_locks[locking.LEVEL_INSTANCE] = 1
15243 def DeclareLocks(self, level):
15244 if level == locking.LEVEL_INSTANCE:
15245 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15247 # Lock instances optimistically, needs verification once group lock has
15249 self.needed_locks[locking.LEVEL_INSTANCE] = \
15250 self.cfg.GetNodeGroupInstances(self.group_uuid)
15253 def _UpdateAndVerifyDiskParams(old, new):
15254 """Updates and verifies disk parameters.
15257 new_params = _GetUpdatedParams(old, new)
15258 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15261 def CheckPrereq(self):
15262 """Check prerequisites.
15265 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15267 # Check if locked instances are still correct
15268 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15270 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15271 cluster = self.cfg.GetClusterInfo()
15273 if self.group is None:
15274 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15275 (self.op.group_name, self.group_uuid))
15277 if self.op.ndparams:
15278 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15279 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15280 self.new_ndparams = new_ndparams
15282 if self.op.diskparams:
15283 diskparams = self.group.diskparams
15284 uavdp = self._UpdateAndVerifyDiskParams
15285 # For each disktemplate subdict update and verify the values
15286 new_diskparams = dict((dt,
15287 uavdp(diskparams.get(dt, {}),
15288 self.op.diskparams[dt]))
15289 for dt in constants.DISK_TEMPLATES
15290 if dt in self.op.diskparams)
15291 # As we've all subdicts of diskparams ready, lets merge the actual
15292 # dict with all updated subdicts
15293 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15295 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15296 except errors.OpPrereqError, err:
15297 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15298 errors.ECODE_INVAL)
15300 if self.op.hv_state:
15301 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15302 self.group.hv_state_static)
15304 if self.op.disk_state:
15305 self.new_disk_state = \
15306 _MergeAndVerifyDiskState(self.op.disk_state,
15307 self.group.disk_state_static)
15309 if self.op.ipolicy:
15310 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15314 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15315 inst_filter = lambda inst: inst.name in owned_instances
15316 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15317 gmi = ganeti.masterd.instance
15319 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15321 new_ipolicy, instances)
15324 self.LogWarning("After the ipolicy change the following instances"
15325 " violate them: %s",
15326 utils.CommaJoin(violations))
15328 def BuildHooksEnv(self):
15329 """Build hooks env.
15333 "GROUP_NAME": self.op.group_name,
15334 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15337 def BuildHooksNodes(self):
15338 """Build hooks nodes.
15341 mn = self.cfg.GetMasterNode()
15342 return ([mn], [mn])
15344 def Exec(self, feedback_fn):
15345 """Modifies the node group.
15350 if self.op.ndparams:
15351 self.group.ndparams = self.new_ndparams
15352 result.append(("ndparams", str(self.group.ndparams)))
15354 if self.op.diskparams:
15355 self.group.diskparams = self.new_diskparams
15356 result.append(("diskparams", str(self.group.diskparams)))
15358 if self.op.alloc_policy:
15359 self.group.alloc_policy = self.op.alloc_policy
15361 if self.op.hv_state:
15362 self.group.hv_state_static = self.new_hv_state
15364 if self.op.disk_state:
15365 self.group.disk_state_static = self.new_disk_state
15367 if self.op.ipolicy:
15368 self.group.ipolicy = self.new_ipolicy
15370 self.cfg.Update(self.group, feedback_fn)
15374 class LUGroupRemove(LogicalUnit):
15375 HPATH = "group-remove"
15376 HTYPE = constants.HTYPE_GROUP
15379 def ExpandNames(self):
15380 # This will raises errors.OpPrereqError on its own:
15381 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15382 self.needed_locks = {
15383 locking.LEVEL_NODEGROUP: [self.group_uuid],
15386 def CheckPrereq(self):
15387 """Check prerequisites.
15389 This checks that the given group name exists as a node group, that is
15390 empty (i.e., contains no nodes), and that is not the last group of the
15394 # Verify that the group is empty.
15395 group_nodes = [node.name
15396 for node in self.cfg.GetAllNodesInfo().values()
15397 if node.group == self.group_uuid]
15400 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15402 (self.op.group_name,
15403 utils.CommaJoin(utils.NiceSort(group_nodes))),
15404 errors.ECODE_STATE)
15406 # Verify the cluster would not be left group-less.
15407 if len(self.cfg.GetNodeGroupList()) == 1:
15408 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15409 " removed" % self.op.group_name,
15410 errors.ECODE_STATE)
15412 def BuildHooksEnv(self):
15413 """Build hooks env.
15417 "GROUP_NAME": self.op.group_name,
15420 def BuildHooksNodes(self):
15421 """Build hooks nodes.
15424 mn = self.cfg.GetMasterNode()
15425 return ([mn], [mn])
15427 def Exec(self, feedback_fn):
15428 """Remove the node group.
15432 self.cfg.RemoveNodeGroup(self.group_uuid)
15433 except errors.ConfigurationError:
15434 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15435 (self.op.group_name, self.group_uuid))
15437 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15440 class LUGroupRename(LogicalUnit):
15441 HPATH = "group-rename"
15442 HTYPE = constants.HTYPE_GROUP
15445 def ExpandNames(self):
15446 # This raises errors.OpPrereqError on its own:
15447 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15449 self.needed_locks = {
15450 locking.LEVEL_NODEGROUP: [self.group_uuid],
15453 def CheckPrereq(self):
15454 """Check prerequisites.
15456 Ensures requested new name is not yet used.
15460 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15461 except errors.OpPrereqError:
15464 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15465 " node group (UUID: %s)" %
15466 (self.op.new_name, new_name_uuid),
15467 errors.ECODE_EXISTS)
15469 def BuildHooksEnv(self):
15470 """Build hooks env.
15474 "OLD_NAME": self.op.group_name,
15475 "NEW_NAME": self.op.new_name,
15478 def BuildHooksNodes(self):
15479 """Build hooks nodes.
15482 mn = self.cfg.GetMasterNode()
15484 all_nodes = self.cfg.GetAllNodesInfo()
15485 all_nodes.pop(mn, None)
15488 run_nodes.extend(node.name for node in all_nodes.values()
15489 if node.group == self.group_uuid)
15491 return (run_nodes, run_nodes)
15493 def Exec(self, feedback_fn):
15494 """Rename the node group.
15497 group = self.cfg.GetNodeGroup(self.group_uuid)
15500 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15501 (self.op.group_name, self.group_uuid))
15503 group.name = self.op.new_name
15504 self.cfg.Update(group, feedback_fn)
15506 return self.op.new_name
15509 class LUGroupEvacuate(LogicalUnit):
15510 HPATH = "group-evacuate"
15511 HTYPE = constants.HTYPE_GROUP
15514 def ExpandNames(self):
15515 # This raises errors.OpPrereqError on its own:
15516 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15518 if self.op.target_groups:
15519 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15520 self.op.target_groups)
15522 self.req_target_uuids = []
15524 if self.group_uuid in self.req_target_uuids:
15525 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15526 " as a target group (targets are %s)" %
15528 utils.CommaJoin(self.req_target_uuids)),
15529 errors.ECODE_INVAL)
15531 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15533 self.share_locks = _ShareAll()
15534 self.needed_locks = {
15535 locking.LEVEL_INSTANCE: [],
15536 locking.LEVEL_NODEGROUP: [],
15537 locking.LEVEL_NODE: [],
15540 def DeclareLocks(self, level):
15541 if level == locking.LEVEL_INSTANCE:
15542 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15544 # Lock instances optimistically, needs verification once node and group
15545 # locks have been acquired
15546 self.needed_locks[locking.LEVEL_INSTANCE] = \
15547 self.cfg.GetNodeGroupInstances(self.group_uuid)
15549 elif level == locking.LEVEL_NODEGROUP:
15550 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15552 if self.req_target_uuids:
15553 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15555 # Lock all groups used by instances optimistically; this requires going
15556 # via the node before it's locked, requiring verification later on
15557 lock_groups.update(group_uuid
15558 for instance_name in
15559 self.owned_locks(locking.LEVEL_INSTANCE)
15561 self.cfg.GetInstanceNodeGroups(instance_name))
15563 # No target groups, need to lock all of them
15564 lock_groups = locking.ALL_SET
15566 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15568 elif level == locking.LEVEL_NODE:
15569 # This will only lock the nodes in the group to be evacuated which
15570 # contain actual instances
15571 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15572 self._LockInstancesNodes()
15574 # Lock all nodes in group to be evacuated and target groups
15575 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15576 assert self.group_uuid in owned_groups
15577 member_nodes = [node_name
15578 for group in owned_groups
15579 for node_name in self.cfg.GetNodeGroup(group).members]
15580 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15582 def CheckPrereq(self):
15583 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15584 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15585 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15587 assert owned_groups.issuperset(self.req_target_uuids)
15588 assert self.group_uuid in owned_groups
15590 # Check if locked instances are still correct
15591 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15593 # Get instance information
15594 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15596 # Check if node groups for locked instances are still correct
15597 _CheckInstancesNodeGroups(self.cfg, self.instances,
15598 owned_groups, owned_nodes, self.group_uuid)
15600 if self.req_target_uuids:
15601 # User requested specific target groups
15602 self.target_uuids = self.req_target_uuids
15604 # All groups except the one to be evacuated are potential targets
15605 self.target_uuids = [group_uuid for group_uuid in owned_groups
15606 if group_uuid != self.group_uuid]
15608 if not self.target_uuids:
15609 raise errors.OpPrereqError("There are no possible target groups",
15610 errors.ECODE_INVAL)
15612 def BuildHooksEnv(self):
15613 """Build hooks env.
15617 "GROUP_NAME": self.op.group_name,
15618 "TARGET_GROUPS": " ".join(self.target_uuids),
15621 def BuildHooksNodes(self):
15622 """Build hooks nodes.
15625 mn = self.cfg.GetMasterNode()
15627 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15629 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15631 return (run_nodes, run_nodes)
15633 def Exec(self, feedback_fn):
15634 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15636 assert self.group_uuid not in self.target_uuids
15638 req = iallocator.IAReqGroupChange(instances=instances,
15639 target_groups=self.target_uuids)
15640 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15642 ial.Run(self.op.iallocator)
15644 if not ial.success:
15645 raise errors.OpPrereqError("Can't compute group evacuation using"
15646 " iallocator '%s': %s" %
15647 (self.op.iallocator, ial.info),
15648 errors.ECODE_NORES)
15650 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15652 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15653 len(jobs), self.op.group_name)
15655 return ResultWithJobs(jobs)
15658 class TagsLU(NoHooksLU): # pylint: disable=W0223
15659 """Generic tags LU.
15661 This is an abstract class which is the parent of all the other tags LUs.
15664 def ExpandNames(self):
15665 self.group_uuid = None
15666 self.needed_locks = {}
15668 if self.op.kind == constants.TAG_NODE:
15669 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15670 lock_level = locking.LEVEL_NODE
15671 lock_name = self.op.name
15672 elif self.op.kind == constants.TAG_INSTANCE:
15673 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15674 lock_level = locking.LEVEL_INSTANCE
15675 lock_name = self.op.name
15676 elif self.op.kind == constants.TAG_NODEGROUP:
15677 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15678 lock_level = locking.LEVEL_NODEGROUP
15679 lock_name = self.group_uuid
15680 elif self.op.kind == constants.TAG_NETWORK:
15681 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15682 lock_level = locking.LEVEL_NETWORK
15683 lock_name = self.network_uuid
15688 if lock_level and getattr(self.op, "use_locking", True):
15689 self.needed_locks[lock_level] = lock_name
15691 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15692 # not possible to acquire the BGL based on opcode parameters)
15694 def CheckPrereq(self):
15695 """Check prerequisites.
15698 if self.op.kind == constants.TAG_CLUSTER:
15699 self.target = self.cfg.GetClusterInfo()
15700 elif self.op.kind == constants.TAG_NODE:
15701 self.target = self.cfg.GetNodeInfo(self.op.name)
15702 elif self.op.kind == constants.TAG_INSTANCE:
15703 self.target = self.cfg.GetInstanceInfo(self.op.name)
15704 elif self.op.kind == constants.TAG_NODEGROUP:
15705 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15706 elif self.op.kind == constants.TAG_NETWORK:
15707 self.target = self.cfg.GetNetwork(self.network_uuid)
15709 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15710 str(self.op.kind), errors.ECODE_INVAL)
15713 class LUTagsGet(TagsLU):
15714 """Returns the tags of a given object.
15719 def ExpandNames(self):
15720 TagsLU.ExpandNames(self)
15722 # Share locks as this is only a read operation
15723 self.share_locks = _ShareAll()
15725 def Exec(self, feedback_fn):
15726 """Returns the tag list.
15729 return list(self.target.GetTags())
15732 class LUTagsSearch(NoHooksLU):
15733 """Searches the tags for a given pattern.
15738 def ExpandNames(self):
15739 self.needed_locks = {}
15741 def CheckPrereq(self):
15742 """Check prerequisites.
15744 This checks the pattern passed for validity by compiling it.
15748 self.re = re.compile(self.op.pattern)
15749 except re.error, err:
15750 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15751 (self.op.pattern, err), errors.ECODE_INVAL)
15753 def Exec(self, feedback_fn):
15754 """Returns the tag list.
15758 tgts = [("/cluster", cfg.GetClusterInfo())]
15759 ilist = cfg.GetAllInstancesInfo().values()
15760 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15761 nlist = cfg.GetAllNodesInfo().values()
15762 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15763 tgts.extend(("/nodegroup/%s" % n.name, n)
15764 for n in cfg.GetAllNodeGroupsInfo().values())
15766 for path, target in tgts:
15767 for tag in target.GetTags():
15768 if self.re.search(tag):
15769 results.append((path, tag))
15773 class LUTagsSet(TagsLU):
15774 """Sets a tag on a given object.
15779 def CheckPrereq(self):
15780 """Check prerequisites.
15782 This checks the type and length of the tag name and value.
15785 TagsLU.CheckPrereq(self)
15786 for tag in self.op.tags:
15787 objects.TaggableObject.ValidateTag(tag)
15789 def Exec(self, feedback_fn):
15794 for tag in self.op.tags:
15795 self.target.AddTag(tag)
15796 except errors.TagError, err:
15797 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15798 self.cfg.Update(self.target, feedback_fn)
15801 class LUTagsDel(TagsLU):
15802 """Delete a list of tags from a given object.
15807 def CheckPrereq(self):
15808 """Check prerequisites.
15810 This checks that we have the given tag.
15813 TagsLU.CheckPrereq(self)
15814 for tag in self.op.tags:
15815 objects.TaggableObject.ValidateTag(tag)
15816 del_tags = frozenset(self.op.tags)
15817 cur_tags = self.target.GetTags()
15819 diff_tags = del_tags - cur_tags
15821 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15822 raise errors.OpPrereqError("Tag(s) %s not found" %
15823 (utils.CommaJoin(diff_names), ),
15824 errors.ECODE_NOENT)
15826 def Exec(self, feedback_fn):
15827 """Remove the tag from the object.
15830 for tag in self.op.tags:
15831 self.target.RemoveTag(tag)
15832 self.cfg.Update(self.target, feedback_fn)
15835 class LUTestDelay(NoHooksLU):
15836 """Sleep for a specified amount of time.
15838 This LU sleeps on the master and/or nodes for a specified amount of
15844 def ExpandNames(self):
15845 """Expand names and set required locks.
15847 This expands the node list, if any.
15850 self.needed_locks = {}
15851 if self.op.on_nodes:
15852 # _GetWantedNodes can be used here, but is not always appropriate to use
15853 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15854 # more information.
15855 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15856 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15858 def _TestDelay(self):
15859 """Do the actual sleep.
15862 if self.op.on_master:
15863 if not utils.TestDelay(self.op.duration):
15864 raise errors.OpExecError("Error during master delay test")
15865 if self.op.on_nodes:
15866 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15867 for node, node_result in result.items():
15868 node_result.Raise("Failure during rpc call to node %s" % node)
15870 def Exec(self, feedback_fn):
15871 """Execute the test delay opcode, with the wanted repetitions.
15874 if self.op.repeat == 0:
15877 top_value = self.op.repeat - 1
15878 for i in range(self.op.repeat):
15879 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15883 class LURestrictedCommand(NoHooksLU):
15884 """Logical unit for executing restricted commands.
15889 def ExpandNames(self):
15891 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15893 self.needed_locks = {
15894 locking.LEVEL_NODE: self.op.nodes,
15896 self.share_locks = {
15897 locking.LEVEL_NODE: not self.op.use_locking,
15900 def CheckPrereq(self):
15901 """Check prerequisites.
15905 def Exec(self, feedback_fn):
15906 """Execute restricted command and return output.
15909 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15911 # Check if correct locks are held
15912 assert set(self.op.nodes).issubset(owned_nodes)
15914 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15918 for node_name in self.op.nodes:
15919 nres = rpcres[node_name]
15921 msg = ("Command '%s' on node '%s' failed: %s" %
15922 (self.op.command, node_name, nres.fail_msg))
15923 result.append((False, msg))
15925 result.append((True, nres.payload))
15930 class LUTestJqueue(NoHooksLU):
15931 """Utility LU to test some aspects of the job queue.
15936 # Must be lower than default timeout for WaitForJobChange to see whether it
15937 # notices changed jobs
15938 _CLIENT_CONNECT_TIMEOUT = 20.0
15939 _CLIENT_CONFIRM_TIMEOUT = 60.0
15942 def _NotifyUsingSocket(cls, cb, errcls):
15943 """Opens a Unix socket and waits for another program to connect.
15946 @param cb: Callback to send socket name to client
15947 @type errcls: class
15948 @param errcls: Exception class to use for errors
15951 # Using a temporary directory as there's no easy way to create temporary
15952 # sockets without writing a custom loop around tempfile.mktemp and
15954 tmpdir = tempfile.mkdtemp()
15956 tmpsock = utils.PathJoin(tmpdir, "sock")
15958 logging.debug("Creating temporary socket at %s", tmpsock)
15959 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15964 # Send details to client
15967 # Wait for client to connect before continuing
15968 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15970 (conn, _) = sock.accept()
15971 except socket.error, err:
15972 raise errcls("Client didn't connect in time (%s)" % err)
15976 # Remove as soon as client is connected
15977 shutil.rmtree(tmpdir)
15979 # Wait for client to close
15982 # pylint: disable=E1101
15983 # Instance of '_socketobject' has no ... member
15984 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15986 except socket.error, err:
15987 raise errcls("Client failed to confirm notification (%s)" % err)
15991 def _SendNotification(self, test, arg, sockname):
15992 """Sends a notification to the client.
15995 @param test: Test name
15996 @param arg: Test argument (depends on test)
15997 @type sockname: string
15998 @param sockname: Socket path
16001 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16003 def _Notify(self, prereq, test, arg):
16004 """Notifies the client of a test.
16007 @param prereq: Whether this is a prereq-phase test
16009 @param test: Test name
16010 @param arg: Test argument (depends on test)
16014 errcls = errors.OpPrereqError
16016 errcls = errors.OpExecError
16018 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16022 def CheckArguments(self):
16023 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16024 self.expandnames_calls = 0
16026 def ExpandNames(self):
16027 checkargs_calls = getattr(self, "checkargs_calls", 0)
16028 if checkargs_calls < 1:
16029 raise errors.ProgrammerError("CheckArguments was not called")
16031 self.expandnames_calls += 1
16033 if self.op.notify_waitlock:
16034 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16036 self.LogInfo("Expanding names")
16038 # Get lock on master node (just to get a lock, not for a particular reason)
16039 self.needed_locks = {
16040 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16043 def Exec(self, feedback_fn):
16044 if self.expandnames_calls < 1:
16045 raise errors.ProgrammerError("ExpandNames was not called")
16047 if self.op.notify_exec:
16048 self._Notify(False, constants.JQT_EXEC, None)
16050 self.LogInfo("Executing")
16052 if self.op.log_messages:
16053 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16054 for idx, msg in enumerate(self.op.log_messages):
16055 self.LogInfo("Sending log message %s", idx + 1)
16056 feedback_fn(constants.JQT_MSGPREFIX + msg)
16057 # Report how many test messages have been sent
16058 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16061 raise errors.OpExecError("Opcode failure was requested")
16066 class LUTestAllocator(NoHooksLU):
16067 """Run allocator tests.
16069 This LU runs the allocator tests
16072 def CheckPrereq(self):
16073 """Check prerequisites.
16075 This checks the opcode parameters depending on the director and mode test.
16078 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16079 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16080 for attr in ["memory", "disks", "disk_template",
16081 "os", "tags", "nics", "vcpus"]:
16082 if not hasattr(self.op, attr):
16083 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16084 attr, errors.ECODE_INVAL)
16085 iname = self.cfg.ExpandInstanceName(self.op.name)
16086 if iname is not None:
16087 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16088 iname, errors.ECODE_EXISTS)
16089 if not isinstance(self.op.nics, list):
16090 raise errors.OpPrereqError("Invalid parameter 'nics'",
16091 errors.ECODE_INVAL)
16092 if not isinstance(self.op.disks, list):
16093 raise errors.OpPrereqError("Invalid parameter 'disks'",
16094 errors.ECODE_INVAL)
16095 for row in self.op.disks:
16096 if (not isinstance(row, dict) or
16097 constants.IDISK_SIZE not in row or
16098 not isinstance(row[constants.IDISK_SIZE], int) or
16099 constants.IDISK_MODE not in row or
16100 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16101 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16102 " parameter", errors.ECODE_INVAL)
16103 if self.op.hypervisor is None:
16104 self.op.hypervisor = self.cfg.GetHypervisorType()
16105 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16106 fname = _ExpandInstanceName(self.cfg, self.op.name)
16107 self.op.name = fname
16108 self.relocate_from = \
16109 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16110 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16111 constants.IALLOCATOR_MODE_NODE_EVAC):
16112 if not self.op.instances:
16113 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16114 self.op.instances = _GetWantedInstances(self, self.op.instances)
16116 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16117 self.op.mode, errors.ECODE_INVAL)
16119 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16120 if self.op.iallocator is None:
16121 raise errors.OpPrereqError("Missing allocator name",
16122 errors.ECODE_INVAL)
16123 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16124 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16125 self.op.direction, errors.ECODE_INVAL)
16127 def Exec(self, feedback_fn):
16128 """Run the allocator test.
16131 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16132 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16133 memory=self.op.memory,
16134 disks=self.op.disks,
16135 disk_template=self.op.disk_template,
16139 vcpus=self.op.vcpus,
16140 spindle_use=self.op.spindle_use,
16141 hypervisor=self.op.hypervisor)
16142 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16143 req = iallocator.IAReqRelocate(name=self.op.name,
16144 relocate_from=list(self.relocate_from))
16145 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16146 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16147 target_groups=self.op.target_groups)
16148 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16149 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16150 evac_mode=self.op.evac_mode)
16151 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16152 disk_template = self.op.disk_template
16153 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16154 memory=self.op.memory,
16155 disks=self.op.disks,
16156 disk_template=disk_template,
16160 vcpus=self.op.vcpus,
16161 spindle_use=self.op.spindle_use,
16162 hypervisor=self.op.hypervisor)
16163 for idx in range(self.op.count)]
16164 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16166 raise errors.ProgrammerError("Uncatched mode %s in"
16167 " LUTestAllocator.Exec", self.op.mode)
16169 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16170 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16171 result = ial.in_text
16173 ial.Run(self.op.iallocator, validate=False)
16174 result = ial.out_text
16178 class LUNetworkAdd(LogicalUnit):
16179 """Logical unit for creating networks.
16182 HPATH = "network-add"
16183 HTYPE = constants.HTYPE_NETWORK
16186 def BuildHooksNodes(self):
16187 """Build hooks nodes.
16190 mn = self.cfg.GetMasterNode()
16191 return ([mn], [mn])
16193 def CheckArguments(self):
16194 if self.op.mac_prefix:
16195 self.op.mac_prefix = \
16196 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16198 def ExpandNames(self):
16199 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16201 if self.op.conflicts_check:
16202 self.share_locks[locking.LEVEL_NODE] = 1
16203 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16204 self.needed_locks = {
16205 locking.LEVEL_NODE: locking.ALL_SET,
16206 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16209 self.needed_locks = {}
16211 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16213 def CheckPrereq(self):
16214 if self.op.network is None:
16215 raise errors.OpPrereqError("Network must be given",
16216 errors.ECODE_INVAL)
16218 uuid = self.cfg.LookupNetwork(self.op.network_name)
16221 raise errors.OpPrereqError(("Network with name '%s' already exists" %
16222 self.op.network_name), errors.ECODE_EXISTS)
16224 # Check tag validity
16225 for tag in self.op.tags:
16226 objects.TaggableObject.ValidateTag(tag)
16228 def BuildHooksEnv(self):
16229 """Build hooks env.
16233 "name": self.op.network_name,
16234 "subnet": self.op.network,
16235 "gateway": self.op.gateway,
16236 "network6": self.op.network6,
16237 "gateway6": self.op.gateway6,
16238 "mac_prefix": self.op.mac_prefix,
16239 "network_type": self.op.network_type,
16240 "tags": self.op.tags,
16242 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16244 def Exec(self, feedback_fn):
16245 """Add the ip pool to the cluster.
16248 nobj = objects.Network(name=self.op.network_name,
16249 network=self.op.network,
16250 gateway=self.op.gateway,
16251 network6=self.op.network6,
16252 gateway6=self.op.gateway6,
16253 mac_prefix=self.op.mac_prefix,
16254 network_type=self.op.network_type,
16255 uuid=self.network_uuid,
16256 family=constants.IP4_VERSION)
16257 # Initialize the associated address pool
16259 pool = network.AddressPool.InitializeNetwork(nobj)
16260 except errors.AddressPoolError, e:
16261 raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16263 # Check if we need to reserve the nodes and the cluster master IP
16264 # These may not be allocated to any instances in routed mode, as
16265 # they wouldn't function anyway.
16266 if self.op.conflicts_check:
16267 for node in self.cfg.GetAllNodesInfo().values():
16268 for ip in [node.primary_ip, node.secondary_ip]:
16270 if pool.Contains(ip):
16272 self.LogInfo("Reserved IP address of node '%s' (%s)",
16274 except errors.AddressPoolError:
16275 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16278 master_ip = self.cfg.GetClusterInfo().master_ip
16280 if pool.Contains(master_ip):
16281 pool.Reserve(master_ip)
16282 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16283 except errors.AddressPoolError:
16284 self.LogWarning("Cannot reserve cluster master IP address (%s)",
16287 if self.op.add_reserved_ips:
16288 for ip in self.op.add_reserved_ips:
16290 pool.Reserve(ip, external=True)
16291 except errors.AddressPoolError, e:
16292 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16295 for tag in self.op.tags:
16298 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16299 del self.remove_locks[locking.LEVEL_NETWORK]
16302 class LUNetworkRemove(LogicalUnit):
16303 HPATH = "network-remove"
16304 HTYPE = constants.HTYPE_NETWORK
16307 def ExpandNames(self):
16308 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16310 if not self.network_uuid:
16311 raise errors.OpPrereqError(("Network '%s' not found" %
16312 self.op.network_name), errors.ECODE_NOENT)
16314 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16315 self.needed_locks = {
16316 locking.LEVEL_NETWORK: [self.network_uuid],
16317 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16320 def CheckPrereq(self):
16321 """Check prerequisites.
16323 This checks that the given network name exists as a network, that is
16324 empty (i.e., contains no nodes), and that is not the last group of the
16328 # Verify that the network is not conncted.
16329 node_groups = [group.name
16330 for group in self.cfg.GetAllNodeGroupsInfo().values()
16331 if self.network_uuid in group.networks]
16334 self.LogWarning("Network '%s' is connected to the following"
16335 " node groups: %s" %
16336 (self.op.network_name,
16337 utils.CommaJoin(utils.NiceSort(node_groups))))
16338 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16340 def BuildHooksEnv(self):
16341 """Build hooks env.
16345 "NETWORK_NAME": self.op.network_name,
16348 def BuildHooksNodes(self):
16349 """Build hooks nodes.
16352 mn = self.cfg.GetMasterNode()
16353 return ([mn], [mn])
16355 def Exec(self, feedback_fn):
16356 """Remove the network.
16360 self.cfg.RemoveNetwork(self.network_uuid)
16361 except errors.ConfigurationError:
16362 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16363 (self.op.network_name, self.network_uuid))
16366 class LUNetworkSetParams(LogicalUnit):
16367 """Modifies the parameters of a network.
16370 HPATH = "network-modify"
16371 HTYPE = constants.HTYPE_NETWORK
16374 def CheckArguments(self):
16375 if (self.op.gateway and
16376 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16377 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16378 " at once", errors.ECODE_INVAL)
16380 def ExpandNames(self):
16381 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16382 if self.network_uuid is None:
16383 raise errors.OpPrereqError(("Network '%s' not found" %
16384 self.op.network_name), errors.ECODE_NOENT)
16386 self.needed_locks = {
16387 locking.LEVEL_NETWORK: [self.network_uuid],
16390 def CheckPrereq(self):
16391 """Check prerequisites.
16394 self.network = self.cfg.GetNetwork(self.network_uuid)
16395 self.gateway = self.network.gateway
16396 self.network_type = self.network.network_type
16397 self.mac_prefix = self.network.mac_prefix
16398 self.network6 = self.network.network6
16399 self.gateway6 = self.network.gateway6
16400 self.tags = self.network.tags
16402 self.pool = network.AddressPool(self.network)
16404 if self.op.gateway:
16405 if self.op.gateway == constants.VALUE_NONE:
16406 self.gateway = None
16408 self.gateway = self.op.gateway
16409 if self.pool.IsReserved(self.gateway):
16410 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16411 " reserved" % self.gateway,
16412 errors.ECODE_STATE)
16414 if self.op.network_type:
16415 if self.op.network_type == constants.VALUE_NONE:
16416 self.network_type = None
16418 self.network_type = self.op.network_type
16420 if self.op.mac_prefix:
16421 if self.op.mac_prefix == constants.VALUE_NONE:
16422 self.mac_prefix = None
16424 self.mac_prefix = \
16425 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16427 if self.op.gateway6:
16428 if self.op.gateway6 == constants.VALUE_NONE:
16429 self.gateway6 = None
16431 self.gateway6 = self.op.gateway6
16433 if self.op.network6:
16434 if self.op.network6 == constants.VALUE_NONE:
16435 self.network6 = None
16437 self.network6 = self.op.network6
16439 def BuildHooksEnv(self):
16440 """Build hooks env.
16444 "name": self.op.network_name,
16445 "subnet": self.network.network,
16446 "gateway": self.gateway,
16447 "network6": self.network6,
16448 "gateway6": self.gateway6,
16449 "mac_prefix": self.mac_prefix,
16450 "network_type": self.network_type,
16453 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16455 def BuildHooksNodes(self):
16456 """Build hooks nodes.
16459 mn = self.cfg.GetMasterNode()
16460 return ([mn], [mn])
16462 def Exec(self, feedback_fn):
16463 """Modifies the network.
16466 #TODO: reserve/release via temporary reservation manager
16467 # extend cfg.ReserveIp/ReleaseIp with the external flag
16468 if self.op.gateway:
16469 if self.gateway == self.network.gateway:
16470 self.LogWarning("Gateway is already %s", self.gateway)
16473 self.pool.Reserve(self.gateway, external=True)
16474 if self.network.gateway:
16475 self.pool.Release(self.network.gateway, external=True)
16476 self.network.gateway = self.gateway
16478 if self.op.add_reserved_ips:
16479 for ip in self.op.add_reserved_ips:
16481 if self.pool.IsReserved(ip):
16482 self.LogWarning("IP address %s is already reserved", ip)
16484 self.pool.Reserve(ip, external=True)
16485 except errors.AddressPoolError, err:
16486 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16488 if self.op.remove_reserved_ips:
16489 for ip in self.op.remove_reserved_ips:
16490 if ip == self.network.gateway:
16491 self.LogWarning("Cannot unreserve Gateway's IP")
16494 if not self.pool.IsReserved(ip):
16495 self.LogWarning("IP address %s is already unreserved", ip)
16497 self.pool.Release(ip, external=True)
16498 except errors.AddressPoolError, err:
16499 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16501 if self.op.mac_prefix:
16502 self.network.mac_prefix = self.mac_prefix
16504 if self.op.network6:
16505 self.network.network6 = self.network6
16507 if self.op.gateway6:
16508 self.network.gateway6 = self.gateway6
16510 if self.op.network_type:
16511 self.network.network_type = self.network_type
16513 self.pool.Validate()
16515 self.cfg.Update(self.network, feedback_fn)
16518 class _NetworkQuery(_QueryBase):
16519 FIELDS = query.NETWORK_FIELDS
16521 def ExpandNames(self, lu):
16522 lu.needed_locks = {}
16523 lu.share_locks = _ShareAll()
16525 self.do_locking = self.use_locking
16527 all_networks = lu.cfg.GetAllNetworksInfo()
16528 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16534 for name in self.names:
16535 if name in name_to_uuid:
16536 self.wanted.append(name_to_uuid[name])
16538 missing.append(name)
16541 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16542 errors.ECODE_NOENT)
16544 self.wanted = locking.ALL_SET
16546 if self.do_locking:
16547 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16548 if query.NETQ_INST in self.requested_data:
16549 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16550 if query.NETQ_GROUP in self.requested_data:
16551 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16553 def DeclareLocks(self, lu, level):
16556 def _GetQueryData(self, lu):
16557 """Computes the list of networks and their attributes.
16560 all_networks = lu.cfg.GetAllNetworksInfo()
16562 network_uuids = self._GetNames(lu, all_networks.keys(),
16563 locking.LEVEL_NETWORK)
16565 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16567 do_instances = query.NETQ_INST in self.requested_data
16568 do_groups = query.NETQ_GROUP in self.requested_data
16570 network_to_instances = None
16571 network_to_groups = None
16573 # For NETQ_GROUP, we need to map network->[groups]
16575 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16576 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16577 for _, group in all_groups.iteritems():
16578 for net_uuid in network_uuids:
16579 netparams = group.networks.get(net_uuid, None)
16581 info = (group.name, netparams[constants.NIC_MODE],
16582 netparams[constants.NIC_LINK])
16584 network_to_groups[net_uuid].append(info)
16587 all_instances = lu.cfg.GetAllInstancesInfo()
16588 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16589 for instance in all_instances.values():
16590 for nic in instance.nics:
16592 net_uuid = name_to_uuid[nic.network]
16593 if net_uuid in network_uuids:
16594 network_to_instances[net_uuid].append(instance.name)
16597 if query.NETQ_STATS in self.requested_data:
16600 self._GetStats(network.AddressPool(all_networks[uuid])))
16601 for uuid in network_uuids)
16605 return query.NetworkQueryData([all_networks[uuid]
16606 for uuid in network_uuids],
16608 network_to_instances,
16612 def _GetStats(pool):
16613 """Returns statistics for a network address pool.
16617 "free_count": pool.GetFreeCount(),
16618 "reserved_count": pool.GetReservedCount(),
16619 "map": pool.GetMap(),
16620 "external_reservations":
16621 utils.CommaJoin(pool.GetExternalReservations()),
16625 class LUNetworkQuery(NoHooksLU):
16626 """Logical unit for querying networks.
16631 def CheckArguments(self):
16632 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16633 self.op.output_fields, self.op.use_locking)
16635 def ExpandNames(self):
16636 self.nq.ExpandNames(self)
16638 def Exec(self, feedback_fn):
16639 return self.nq.OldStyleQuery(self)
16642 class LUNetworkConnect(LogicalUnit):
16643 """Connect a network to a nodegroup
16646 HPATH = "network-connect"
16647 HTYPE = constants.HTYPE_NETWORK
16650 def ExpandNames(self):
16651 self.network_name = self.op.network_name
16652 self.group_name = self.op.group_name
16653 self.network_mode = self.op.network_mode
16654 self.network_link = self.op.network_link
16656 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16657 if self.network_uuid is None:
16658 raise errors.OpPrereqError("Network '%s' does not exist" %
16659 self.network_name, errors.ECODE_NOENT)
16661 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16662 if self.group_uuid is None:
16663 raise errors.OpPrereqError("Group '%s' does not exist" %
16664 self.group_name, errors.ECODE_NOENT)
16666 self.needed_locks = {
16667 locking.LEVEL_INSTANCE: [],
16668 locking.LEVEL_NODEGROUP: [self.group_uuid],
16670 self.share_locks[locking.LEVEL_INSTANCE] = 1
16672 if self.op.conflicts_check:
16673 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16674 self.share_locks[locking.LEVEL_NETWORK] = 1
16676 def DeclareLocks(self, level):
16677 if level == locking.LEVEL_INSTANCE:
16678 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16680 # Lock instances optimistically, needs verification once group lock has
16682 if self.op.conflicts_check:
16683 self.needed_locks[locking.LEVEL_INSTANCE] = \
16684 self.cfg.GetNodeGroupInstances(self.group_uuid)
16686 def BuildHooksEnv(self):
16688 "GROUP_NAME": self.group_name,
16689 "GROUP_NETWORK_MODE": self.network_mode,
16690 "GROUP_NETWORK_LINK": self.network_link,
16694 def BuildHooksNodes(self):
16695 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16696 return (nodes, nodes)
16698 def CheckPrereq(self):
16699 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16701 assert self.group_uuid in owned_groups
16704 constants.NIC_MODE: self.network_mode,
16705 constants.NIC_LINK: self.network_link,
16707 objects.NIC.CheckParameterSyntax(self.netparams)
16709 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16710 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16711 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16712 self.connected = False
16713 if self.network_uuid in self.group.networks:
16714 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16715 (self.network_name, self.group.name))
16716 self.connected = True
16719 if self.op.conflicts_check:
16720 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16722 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16725 def Exec(self, feedback_fn):
16729 self.group.networks[self.network_uuid] = self.netparams
16730 self.cfg.Update(self.group, feedback_fn)
16733 def _NetworkConflictCheck(lu, check_fn, action):
16734 """Checks for network interface conflicts with a network.
16736 @type lu: L{LogicalUnit}
16737 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16739 @param check_fn: Function checking for conflict
16740 @type action: string
16741 @param action: Part of error message (see code)
16742 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16745 # Check if locked instances are still correct
16746 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16747 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16751 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16752 instconflicts = [(idx, nic.ip)
16753 for (idx, nic) in enumerate(instance.nics)
16757 conflicts.append((instance.name, instconflicts))
16760 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16761 " node group '%s', are in use: %s" %
16762 (lu.network_name, action, lu.group.name,
16763 utils.CommaJoin(("%s: %s" %
16764 (name, _FmtNetworkConflict(details)))
16765 for (name, details) in conflicts)))
16767 raise errors.OpPrereqError("Conflicting IP addresses found; "
16768 " remove/modify the corresponding network"
16769 " interfaces", errors.ECODE_STATE)
16772 def _FmtNetworkConflict(details):
16773 """Utility for L{_NetworkConflictCheck}.
16776 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16777 for (idx, ipaddr) in details)
16780 class LUNetworkDisconnect(LogicalUnit):
16781 """Disconnect a network to a nodegroup
16784 HPATH = "network-disconnect"
16785 HTYPE = constants.HTYPE_NETWORK
16788 def ExpandNames(self):
16789 self.network_name = self.op.network_name
16790 self.group_name = self.op.group_name
16792 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16793 if self.network_uuid is None:
16794 raise errors.OpPrereqError("Network '%s' does not exist" %
16795 self.network_name, errors.ECODE_NOENT)
16797 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16798 if self.group_uuid is None:
16799 raise errors.OpPrereqError("Group '%s' does not exist" %
16800 self.group_name, errors.ECODE_NOENT)
16802 self.needed_locks = {
16803 locking.LEVEL_INSTANCE: [],
16804 locking.LEVEL_NODEGROUP: [self.group_uuid],
16806 self.share_locks[locking.LEVEL_INSTANCE] = 1
16808 def DeclareLocks(self, level):
16809 if level == locking.LEVEL_INSTANCE:
16810 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16812 # Lock instances optimistically, needs verification once group lock has
16814 self.needed_locks[locking.LEVEL_INSTANCE] = \
16815 self.cfg.GetNodeGroupInstances(self.group_uuid)
16817 def BuildHooksEnv(self):
16819 "GROUP_NAME": self.group_name,
16823 def BuildHooksNodes(self):
16824 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16825 return (nodes, nodes)
16827 def CheckPrereq(self):
16828 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16830 assert self.group_uuid in owned_groups
16832 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16833 self.connected = True
16834 if self.network_uuid not in self.group.networks:
16835 self.LogWarning("Network '%s' is not mapped to group '%s'",
16836 self.network_name, self.group.name)
16837 self.connected = False
16840 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16843 def Exec(self, feedback_fn):
16844 if not self.connected:
16847 del self.group.networks[self.network_uuid]
16848 self.cfg.Update(self.group, feedback_fn)
16851 #: Query type implementations
16853 constants.QR_CLUSTER: _ClusterQuery,
16854 constants.QR_INSTANCE: _InstanceQuery,
16855 constants.QR_NODE: _NodeQuery,
16856 constants.QR_GROUP: _GroupQuery,
16857 constants.QR_NETWORK: _NetworkQuery,
16858 constants.QR_OS: _OsQuery,
16859 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16860 constants.QR_EXPORT: _ExportQuery,
16863 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16866 def _GetQueryImplementation(name):
16867 """Returns the implemtnation for a query type.
16869 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16873 return _QUERY_IMPL[name]
16875 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16876 errors.ECODE_INVAL)
16879 def _CheckForConflictingIp(lu, ip, node):
16880 """In case of conflicting IP address raise error.
16883 @param ip: IP address
16885 @param node: node name
16888 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16889 if conf_net is not None:
16890 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16892 errors.ECODE_STATE)
16894 return (None, None)