4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckGlobalHvParams(params):
1024 """Validates that given hypervisor params are not global ones.
1026 This will ensure that instances don't get customised versions of
1030 used_globals = constants.HVC_GLOBALS.intersection(params)
1032 msg = ("The following hypervisor parameters are global and cannot"
1033 " be customized at instance level, please modify them at"
1034 " cluster level: %s" % utils.CommaJoin(used_globals))
1035 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1038 def _CheckNodeOnline(lu, node, msg=None):
1039 """Ensure that a given node is online.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @param msg: if passed, should be a message to replace the default one
1044 @raise errors.OpPrereqError: if the node is offline
1048 msg = "Can't use offline node"
1049 if lu.cfg.GetNodeInfo(node).offline:
1050 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1053 def _CheckNodeNotDrained(lu, node):
1054 """Ensure that a given node is not drained.
1056 @param lu: the LU on behalf of which we make the check
1057 @param node: the node to check
1058 @raise errors.OpPrereqError: if the node is drained
1061 if lu.cfg.GetNodeInfo(node).drained:
1062 raise errors.OpPrereqError("Can't use drained node %s" % node,
1066 def _CheckNodeVmCapable(lu, node):
1067 """Ensure that a given node is vm capable.
1069 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @raise errors.OpPrereqError: if the node is not vm capable
1074 if not lu.cfg.GetNodeInfo(node).vm_capable:
1075 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080 """Ensure that a node supports a given OS.
1082 @param lu: the LU on behalf of which we make the check
1083 @param node: the node to check
1084 @param os_name: the OS to query about
1085 @param force_variant: whether to ignore variant errors
1086 @raise errors.OpPrereqError: if the node is not supporting the OS
1089 result = lu.rpc.call_os_get(node, os_name)
1090 result.Raise("OS '%s' not in supported OS list for node %s" %
1092 prereq=True, ecode=errors.ECODE_INVAL)
1093 if not force_variant:
1094 _CheckOSVariant(result.payload, os_name)
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098 """Ensure that a node has the given secondary ip.
1100 @type lu: L{LogicalUnit}
1101 @param lu: the LU on behalf of which we make the check
1103 @param node: the node to check
1104 @type secondary_ip: string
1105 @param secondary_ip: the ip to check
1106 @type prereq: boolean
1107 @param prereq: whether to throw a prerequisite or an execute error
1108 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1112 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113 result.Raise("Failure checking secondary ip on node %s" % node,
1114 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115 if not result.payload:
1116 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117 " please fix and re-run this command" % secondary_ip)
1119 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1121 raise errors.OpExecError(msg)
1124 def _CheckNodePVs(nresult, exclusive_storage):
1128 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129 if pvlist_dict is None:
1130 return (["Can't get PV list from node"], None)
1131 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1133 # check that ':' is not present in PV names, since it's a
1134 # special character for lvcreate (denotes the range of PEs to
1138 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139 (pv.name, pv.vg_name))
1141 if exclusive_storage:
1142 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143 errlist.extend(errmsgs)
1144 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1146 for (pvname, lvlist) in shared_pvs:
1147 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149 (pvname, utils.CommaJoin(lvlist)))
1150 return (errlist, es_pvinfo)
1153 def _GetClusterDomainSecret():
1154 """Reads the cluster domain secret.
1157 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162 """Ensure that an instance is in one of the required states.
1164 @param lu: the LU on behalf of which we make the check
1165 @param instance: the instance to check
1166 @param msg: if passed, should be a message to replace the default one
1167 @raise errors.OpPrereqError: if the instance is not in the required state
1171 msg = ("can't use instance from outside %s states" %
1172 utils.CommaJoin(req_states))
1173 if instance.admin_state not in req_states:
1174 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175 (instance.name, instance.admin_state, msg),
1178 if constants.ADMINST_UP not in req_states:
1179 pnode = instance.primary_node
1180 if not lu.cfg.GetNodeInfo(pnode).offline:
1181 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183 prereq=True, ecode=errors.ECODE_ENVIRON)
1184 if instance.name in ins_l.payload:
1185 raise errors.OpPrereqError("Instance %s is running, %s" %
1186 (instance.name, msg), errors.ECODE_STATE)
1188 lu.LogWarning("Primary node offline, ignoring check that instance"
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193 """Computes if value is in the desired range.
1195 @param name: name of the parameter for which we perform the check
1196 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1198 @param ipolicy: dictionary containing min, max and std values
1199 @param value: actual value that we want to use
1200 @return: None or element not meeting the criteria
1204 if value in [None, constants.VALUE_AUTO]:
1206 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208 if value > max_v or min_v > value:
1210 fqn = "%s/%s" % (name, qualifier)
1213 return ("%s value %s is not in range [%s, %s]" %
1214 (fqn, value, min_v, max_v))
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219 nic_count, disk_sizes, spindle_use,
1220 _compute_fn=_ComputeMinMaxSpec):
1221 """Verifies ipolicy against provided specs.
1224 @param ipolicy: The ipolicy
1226 @param mem_size: The memory size
1227 @type cpu_count: int
1228 @param cpu_count: Used cpu cores
1229 @type disk_count: int
1230 @param disk_count: Number of disks used
1231 @type nic_count: int
1232 @param nic_count: Number of nics used
1233 @type disk_sizes: list of ints
1234 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235 @type spindle_use: int
1236 @param spindle_use: The number of spindles this instance uses
1237 @param _compute_fn: The compute function (unittest only)
1238 @return: A list of violations, or an empty list of no violations are found
1241 assert disk_count == len(disk_sizes)
1244 (constants.ISPEC_MEM_SIZE, "", mem_size),
1245 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246 (constants.ISPEC_DISK_COUNT, "", disk_count),
1247 (constants.ISPEC_NIC_COUNT, "", nic_count),
1248 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250 for idx, d in enumerate(disk_sizes)]
1253 (_compute_fn(name, qualifier, ipolicy, value)
1254 for (name, qualifier, value) in test_settings))
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258 _compute_fn=_ComputeIPolicySpecViolation):
1259 """Compute if instance meets the specs of ipolicy.
1262 @param ipolicy: The ipolicy to verify against
1263 @type instance: L{objects.Instance}
1264 @param instance: The instance to verify
1265 @param _compute_fn: The function to verify ipolicy (unittest only)
1266 @see: L{_ComputeIPolicySpecViolation}
1269 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272 disk_count = len(instance.disks)
1273 disk_sizes = [disk.size for disk in instance.disks]
1274 nic_count = len(instance.nics)
1276 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277 disk_sizes, spindle_use)
1280 def _ComputeIPolicyInstanceSpecViolation(
1281 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282 """Compute if instance specs meets the specs of ipolicy.
1285 @param ipolicy: The ipolicy to verify against
1286 @param instance_spec: dict
1287 @param instance_spec: The instance spec to verify
1288 @param _compute_fn: The function to verify ipolicy (unittest only)
1289 @see: L{_ComputeIPolicySpecViolation}
1292 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1299 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300 disk_sizes, spindle_use)
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1305 _compute_fn=_ComputeIPolicyInstanceViolation):
1306 """Compute if instance meets the specs of the new target group.
1308 @param ipolicy: The ipolicy to verify
1309 @param instance: The instance object to verify
1310 @param current_group: The current group of the instance
1311 @param target_group: The new group of the instance
1312 @param _compute_fn: The function to verify ipolicy (unittest only)
1313 @see: L{_ComputeIPolicySpecViolation}
1316 if current_group == target_group:
1319 return _compute_fn(ipolicy, instance)
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323 _compute_fn=_ComputeIPolicyNodeViolation):
1324 """Checks that the target node is correct in terms of instance policy.
1326 @param ipolicy: The ipolicy to verify
1327 @param instance: The instance object to verify
1328 @param node: The new node to relocate
1329 @param ignore: Ignore violations of the ipolicy
1330 @param _compute_fn: The function to verify ipolicy (unittest only)
1331 @see: L{_ComputeIPolicySpecViolation}
1334 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1338 msg = ("Instance does not meet target node group's (%s) instance"
1339 " policy: %s") % (node.group, utils.CommaJoin(res))
1343 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347 """Computes a set of any instances that would violate the new ipolicy.
1349 @param old_ipolicy: The current (still in-place) ipolicy
1350 @param new_ipolicy: The new (to become) ipolicy
1351 @param instances: List of instances to verify
1352 @return: A list of instances which violates the new ipolicy but
1356 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357 _ComputeViolatingInstances(old_ipolicy, instances))
1360 def _ExpandItemName(fn, name, kind):
1361 """Expand an item name.
1363 @param fn: the function to use for expansion
1364 @param name: requested item name
1365 @param kind: text description ('Node' or 'Instance')
1366 @return: the resolved (full) name
1367 @raise errors.OpPrereqError: if the item is not found
1370 full_name = fn(name)
1371 if full_name is None:
1372 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1377 def _ExpandNodeName(cfg, name):
1378 """Wrapper over L{_ExpandItemName} for nodes."""
1379 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1382 def _ExpandInstanceName(cfg, name):
1383 """Wrapper over L{_ExpandItemName} for instance."""
1384 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388 network_type, mac_prefix, tags):
1389 """Builds network related env variables for hooks
1391 This builds the hook environment from individual variables.
1394 @param name: the name of the network
1395 @type subnet: string
1396 @param subnet: the ipv4 subnet
1397 @type gateway: string
1398 @param gateway: the ipv4 gateway
1399 @type network6: string
1400 @param network6: the ipv6 subnet
1401 @type gateway6: string
1402 @param gateway6: the ipv6 gateway
1403 @type network_type: string
1404 @param network_type: the type of the network
1405 @type mac_prefix: string
1406 @param mac_prefix: the mac_prefix
1408 @param tags: the tags of the network
1413 env["NETWORK_NAME"] = name
1415 env["NETWORK_SUBNET"] = subnet
1417 env["NETWORK_GATEWAY"] = gateway
1419 env["NETWORK_SUBNET6"] = network6
1421 env["NETWORK_GATEWAY6"] = gateway6
1423 env["NETWORK_MAC_PREFIX"] = mac_prefix
1425 env["NETWORK_TYPE"] = network_type
1427 env["NETWORK_TAGS"] = " ".join(tags)
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433 minmem, maxmem, vcpus, nics, disk_template, disks,
1434 bep, hvp, hypervisor_name, tags):
1435 """Builds instance related env variables for hooks
1437 This builds the hook environment from individual variables.
1440 @param name: the name of the instance
1441 @type primary_node: string
1442 @param primary_node: the name of the instance's primary node
1443 @type secondary_nodes: list
1444 @param secondary_nodes: list of secondary nodes as strings
1445 @type os_type: string
1446 @param os_type: the name of the instance's OS
1447 @type status: string
1448 @param status: the desired status of the instance
1449 @type minmem: string
1450 @param minmem: the minimum memory size of the instance
1451 @type maxmem: string
1452 @param maxmem: the maximum memory size of the instance
1454 @param vcpus: the count of VCPUs the instance has
1456 @param nics: list of tuples (ip, mac, mode, link, network) representing
1457 the NICs the instance has
1458 @type disk_template: string
1459 @param disk_template: the disk template of the instance
1461 @param disks: the list of (size, mode) pairs
1463 @param bep: the backend parameters for the instance
1465 @param hvp: the hypervisor parameters for the instance
1466 @type hypervisor_name: string
1467 @param hypervisor_name: the hypervisor for the instance
1469 @param tags: list of instance tags as strings
1471 @return: the hook environment for this instance
1476 "INSTANCE_NAME": name,
1477 "INSTANCE_PRIMARY": primary_node,
1478 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479 "INSTANCE_OS_TYPE": os_type,
1480 "INSTANCE_STATUS": status,
1481 "INSTANCE_MINMEM": minmem,
1482 "INSTANCE_MAXMEM": maxmem,
1483 # TODO(2.7) remove deprecated "memory" value
1484 "INSTANCE_MEMORY": maxmem,
1485 "INSTANCE_VCPUS": vcpus,
1486 "INSTANCE_DISK_TEMPLATE": disk_template,
1487 "INSTANCE_HYPERVISOR": hypervisor_name,
1490 nic_count = len(nics)
1491 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1494 env["INSTANCE_NIC%d_IP" % idx] = ip
1495 env["INSTANCE_NIC%d_MAC" % idx] = mac
1496 env["INSTANCE_NIC%d_MODE" % idx] = mode
1497 env["INSTANCE_NIC%d_LINK" % idx] = link
1499 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1501 nobj = objects.Network.FromDict(netinfo)
1503 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1505 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1507 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1509 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1511 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1512 if nobj.network_type:
1513 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1515 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1556 cluster = lu.cfg.GetClusterInfo()
1557 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1558 mode = filled_params[constants.NIC_MODE]
1559 link = filled_params[constants.NIC_LINK]
1563 net_uuid = lu.cfg.LookupNetwork(net)
1565 nobj = lu.cfg.GetNetwork(net_uuid)
1566 netinfo = objects.Network.ToDict(nobj)
1567 return (ip, mac, mode, link, net, netinfo)
1570 def _NICListToTuple(lu, nics):
1571 """Build a list of nic information tuples.
1573 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1574 value in LUInstanceQueryData.
1576 @type lu: L{LogicalUnit}
1577 @param lu: the logical unit on whose behalf we execute
1578 @type nics: list of L{objects.NIC}
1579 @param nics: list of nics to convert to hooks tuples
1584 hooks_nics.append(_NICToTuple(lu, nic))
1588 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1589 """Builds instance related env variables for hooks from an object.
1591 @type lu: L{LogicalUnit}
1592 @param lu: the logical unit on whose behalf we execute
1593 @type instance: L{objects.Instance}
1594 @param instance: the instance for which we should build the
1596 @type override: dict
1597 @param override: dictionary with key/values that will override
1600 @return: the hook environment dictionary
1603 cluster = lu.cfg.GetClusterInfo()
1604 bep = cluster.FillBE(instance)
1605 hvp = cluster.FillHV(instance)
1607 "name": instance.name,
1608 "primary_node": instance.primary_node,
1609 "secondary_nodes": instance.secondary_nodes,
1610 "os_type": instance.os,
1611 "status": instance.admin_state,
1612 "maxmem": bep[constants.BE_MAXMEM],
1613 "minmem": bep[constants.BE_MINMEM],
1614 "vcpus": bep[constants.BE_VCPUS],
1615 "nics": _NICListToTuple(lu, instance.nics),
1616 "disk_template": instance.disk_template,
1617 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1620 "hypervisor_name": instance.hypervisor,
1621 "tags": instance.tags,
1624 args.update(override)
1625 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1628 def _AdjustCandidatePool(lu, exceptions):
1629 """Adjust the candidate pool after node operations.
1632 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1634 lu.LogInfo("Promoted nodes to master candidate role: %s",
1635 utils.CommaJoin(node.name for node in mod_list))
1636 for name in mod_list:
1637 lu.context.ReaddNode(name)
1638 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1640 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1644 def _DecideSelfPromotion(lu, exceptions=None):
1645 """Decide whether I should promote myself as a master candidate.
1648 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1649 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1650 # the new node will increase mc_max with one, so:
1651 mc_should = min(mc_should + 1, cp_size)
1652 return mc_now < mc_should
1655 def _ComputeViolatingInstances(ipolicy, instances):
1656 """Computes a set of instances who violates given ipolicy.
1658 @param ipolicy: The ipolicy to verify
1659 @type instances: object.Instance
1660 @param instances: List of instances to verify
1661 @return: A frozenset of instance names violating the ipolicy
1664 return frozenset([inst.name for inst in instances
1665 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1668 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1669 """Check that the brigdes needed by a list of nics exist.
1672 cluster = lu.cfg.GetClusterInfo()
1673 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1674 brlist = [params[constants.NIC_LINK] for params in paramslist
1675 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1677 result = lu.rpc.call_bridges_exist(target_node, brlist)
1678 result.Raise("Error checking bridges on destination node '%s'" %
1679 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1682 def _CheckInstanceBridgesExist(lu, instance, node=None):
1683 """Check that the brigdes needed by an instance exist.
1687 node = instance.primary_node
1688 _CheckNicsBridgesExist(lu, instance.nics, node)
1691 def _CheckOSVariant(os_obj, name):
1692 """Check whether an OS name conforms to the os variants specification.
1694 @type os_obj: L{objects.OS}
1695 @param os_obj: OS object to check
1697 @param name: OS name passed by the user, to check for validity
1700 variant = objects.OS.GetVariant(name)
1701 if not os_obj.supported_variants:
1703 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1704 " passed)" % (os_obj.name, variant),
1708 raise errors.OpPrereqError("OS name must include a variant",
1711 if variant not in os_obj.supported_variants:
1712 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1715 def _GetNodeInstancesInner(cfg, fn):
1716 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1719 def _GetNodeInstances(cfg, node_name):
1720 """Returns a list of all primary and secondary instances on a node.
1724 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1727 def _GetNodePrimaryInstances(cfg, node_name):
1728 """Returns primary instances on a node.
1731 return _GetNodeInstancesInner(cfg,
1732 lambda inst: node_name == inst.primary_node)
1735 def _GetNodeSecondaryInstances(cfg, node_name):
1736 """Returns secondary instances on a node.
1739 return _GetNodeInstancesInner(cfg,
1740 lambda inst: node_name in inst.secondary_nodes)
1743 def _GetStorageTypeArgs(cfg, storage_type):
1744 """Returns the arguments for a storage type.
1747 # Special case for file storage
1748 if storage_type == constants.ST_FILE:
1749 # storage.FileStorage wants a list of storage directories
1750 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1755 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1758 for dev in instance.disks:
1759 cfg.SetDiskID(dev, node_name)
1761 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1763 result.Raise("Failed to get disk status from node %s" % node_name,
1764 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1766 for idx, bdev_status in enumerate(result.payload):
1767 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1773 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1774 """Check the sanity of iallocator and node arguments and use the
1775 cluster-wide iallocator if appropriate.
1777 Check that at most one of (iallocator, node) is specified. If none is
1778 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1779 then the LU's opcode's iallocator slot is filled with the cluster-wide
1782 @type iallocator_slot: string
1783 @param iallocator_slot: the name of the opcode iallocator slot
1784 @type node_slot: string
1785 @param node_slot: the name of the opcode target node slot
1788 node = getattr(lu.op, node_slot, None)
1789 ialloc = getattr(lu.op, iallocator_slot, None)
1793 if node is not None and ialloc is not None:
1794 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1796 elif ((node is None and ialloc is None) or
1797 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1798 default_iallocator = lu.cfg.GetDefaultIAllocator()
1799 if default_iallocator:
1800 setattr(lu.op, iallocator_slot, default_iallocator)
1802 raise errors.OpPrereqError("No iallocator or node given and no"
1803 " cluster-wide default iallocator found;"
1804 " please specify either an iallocator or a"
1805 " node, or set a cluster-wide default"
1806 " iallocator", errors.ECODE_INVAL)
1809 def _GetDefaultIAllocator(cfg, ialloc):
1810 """Decides on which iallocator to use.
1812 @type cfg: L{config.ConfigWriter}
1813 @param cfg: Cluster configuration object
1814 @type ialloc: string or None
1815 @param ialloc: Iallocator specified in opcode
1817 @return: Iallocator name
1821 # Use default iallocator
1822 ialloc = cfg.GetDefaultIAllocator()
1825 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1826 " opcode nor as a cluster-wide default",
1832 def _CheckHostnameSane(lu, name):
1833 """Ensures that a given hostname resolves to a 'sane' name.
1835 The given name is required to be a prefix of the resolved hostname,
1836 to prevent accidental mismatches.
1838 @param lu: the logical unit on behalf of which we're checking
1839 @param name: the name we should resolve and check
1840 @return: the resolved hostname object
1843 hostname = netutils.GetHostname(name=name)
1844 if hostname.name != name:
1845 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1846 if not utils.MatchNameComponent(name, [hostname.name]):
1847 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1848 " same as given hostname '%s'") %
1849 (hostname.name, name), errors.ECODE_INVAL)
1853 class LUClusterPostInit(LogicalUnit):
1854 """Logical unit for running hooks after cluster initialization.
1857 HPATH = "cluster-init"
1858 HTYPE = constants.HTYPE_CLUSTER
1860 def BuildHooksEnv(self):
1865 "OP_TARGET": self.cfg.GetClusterName(),
1868 def BuildHooksNodes(self):
1869 """Build hooks nodes.
1872 return ([], [self.cfg.GetMasterNode()])
1874 def Exec(self, feedback_fn):
1881 class LUClusterDestroy(LogicalUnit):
1882 """Logical unit for destroying the cluster.
1885 HPATH = "cluster-destroy"
1886 HTYPE = constants.HTYPE_CLUSTER
1888 def BuildHooksEnv(self):
1893 "OP_TARGET": self.cfg.GetClusterName(),
1896 def BuildHooksNodes(self):
1897 """Build hooks nodes.
1902 def CheckPrereq(self):
1903 """Check prerequisites.
1905 This checks whether the cluster is empty.
1907 Any errors are signaled by raising errors.OpPrereqError.
1910 master = self.cfg.GetMasterNode()
1912 nodelist = self.cfg.GetNodeList()
1913 if len(nodelist) != 1 or nodelist[0] != master:
1914 raise errors.OpPrereqError("There are still %d node(s) in"
1915 " this cluster." % (len(nodelist) - 1),
1917 instancelist = self.cfg.GetInstanceList()
1919 raise errors.OpPrereqError("There are still %d instance(s) in"
1920 " this cluster." % len(instancelist),
1923 def Exec(self, feedback_fn):
1924 """Destroys the cluster.
1927 master_params = self.cfg.GetMasterNetworkParameters()
1929 # Run post hooks on master node before it's removed
1930 _RunPostHook(self, master_params.name)
1932 ems = self.cfg.GetUseExternalMipScript()
1933 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1936 self.LogWarning("Error disabling the master IP address: %s",
1939 return master_params.name
1942 def _VerifyCertificate(filename):
1943 """Verifies a certificate for L{LUClusterVerifyConfig}.
1945 @type filename: string
1946 @param filename: Path to PEM file
1950 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1951 utils.ReadFile(filename))
1952 except Exception, err: # pylint: disable=W0703
1953 return (LUClusterVerifyConfig.ETYPE_ERROR,
1954 "Failed to load X509 certificate %s: %s" % (filename, err))
1957 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1958 constants.SSL_CERT_EXPIRATION_ERROR)
1961 fnamemsg = "While verifying %s: %s" % (filename, msg)
1966 return (None, fnamemsg)
1967 elif errcode == utils.CERT_WARNING:
1968 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1969 elif errcode == utils.CERT_ERROR:
1970 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1972 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1975 def _GetAllHypervisorParameters(cluster, instances):
1976 """Compute the set of all hypervisor parameters.
1978 @type cluster: L{objects.Cluster}
1979 @param cluster: the cluster object
1980 @param instances: list of L{objects.Instance}
1981 @param instances: additional instances from which to obtain parameters
1982 @rtype: list of (origin, hypervisor, parameters)
1983 @return: a list with all parameters found, indicating the hypervisor they
1984 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1989 for hv_name in cluster.enabled_hypervisors:
1990 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1992 for os_name, os_hvp in cluster.os_hvp.items():
1993 for hv_name, hv_params in os_hvp.items():
1995 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1996 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1998 # TODO: collapse identical parameter values in a single one
1999 for instance in instances:
2000 if instance.hvparams:
2001 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2002 cluster.FillHV(instance)))
2007 class _VerifyErrors(object):
2008 """Mix-in for cluster/group verify LUs.
2010 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2011 self.op and self._feedback_fn to be available.)
2015 ETYPE_FIELD = "code"
2016 ETYPE_ERROR = "ERROR"
2017 ETYPE_WARNING = "WARNING"
2019 def _Error(self, ecode, item, msg, *args, **kwargs):
2020 """Format an error message.
2022 Based on the opcode's error_codes parameter, either format a
2023 parseable error code, or a simpler error string.
2025 This must be called only from Exec and functions called from Exec.
2028 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2029 itype, etxt, _ = ecode
2030 # first complete the msg
2033 # then format the whole message
2034 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2035 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2041 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2042 # and finally report it via the feedback_fn
2043 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2045 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2046 """Log an error message if the passed condition is True.
2050 or self.op.debug_simulate_errors) # pylint: disable=E1101
2052 # If the error code is in the list of ignored errors, demote the error to a
2054 (_, etxt, _) = ecode
2055 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2056 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2059 self._Error(ecode, *args, **kwargs)
2061 # do not mark the operation as failed for WARN cases only
2062 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2063 self.bad = self.bad or cond
2066 class LUClusterVerify(NoHooksLU):
2067 """Submits all jobs necessary to verify the cluster.
2072 def ExpandNames(self):
2073 self.needed_locks = {}
2075 def Exec(self, feedback_fn):
2078 if self.op.group_name:
2079 groups = [self.op.group_name]
2080 depends_fn = lambda: None
2082 groups = self.cfg.GetNodeGroupList()
2084 # Verify global configuration
2086 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2089 # Always depend on global verification
2090 depends_fn = lambda: [(-len(jobs), [])]
2093 [opcodes.OpClusterVerifyGroup(group_name=group,
2094 ignore_errors=self.op.ignore_errors,
2095 depends=depends_fn())]
2096 for group in groups)
2098 # Fix up all parameters
2099 for op in itertools.chain(*jobs): # pylint: disable=W0142
2100 op.debug_simulate_errors = self.op.debug_simulate_errors
2101 op.verbose = self.op.verbose
2102 op.error_codes = self.op.error_codes
2104 op.skip_checks = self.op.skip_checks
2105 except AttributeError:
2106 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2108 return ResultWithJobs(jobs)
2111 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2112 """Verifies the cluster config.
2117 def _VerifyHVP(self, hvp_data):
2118 """Verifies locally the syntax of the hypervisor parameters.
2121 for item, hv_name, hv_params in hvp_data:
2122 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2125 hv_class = hypervisor.GetHypervisorClass(hv_name)
2126 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2127 hv_class.CheckParameterSyntax(hv_params)
2128 except errors.GenericError, err:
2129 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2131 def ExpandNames(self):
2132 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2133 self.share_locks = _ShareAll()
2135 def CheckPrereq(self):
2136 """Check prerequisites.
2139 # Retrieve all information
2140 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2141 self.all_node_info = self.cfg.GetAllNodesInfo()
2142 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2144 def Exec(self, feedback_fn):
2145 """Verify integrity of cluster, performing various test on nodes.
2149 self._feedback_fn = feedback_fn
2151 feedback_fn("* Verifying cluster config")
2153 for msg in self.cfg.VerifyConfig():
2154 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2156 feedback_fn("* Verifying cluster certificate files")
2158 for cert_filename in pathutils.ALL_CERT_FILES:
2159 (errcode, msg) = _VerifyCertificate(cert_filename)
2160 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2162 feedback_fn("* Verifying hypervisor parameters")
2164 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2165 self.all_inst_info.values()))
2167 feedback_fn("* Verifying all nodes belong to an existing group")
2169 # We do this verification here because, should this bogus circumstance
2170 # occur, it would never be caught by VerifyGroup, which only acts on
2171 # nodes/instances reachable from existing node groups.
2173 dangling_nodes = set(node.name for node in self.all_node_info.values()
2174 if node.group not in self.all_group_info)
2176 dangling_instances = {}
2177 no_node_instances = []
2179 for inst in self.all_inst_info.values():
2180 if inst.primary_node in dangling_nodes:
2181 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2182 elif inst.primary_node not in self.all_node_info:
2183 no_node_instances.append(inst.name)
2188 utils.CommaJoin(dangling_instances.get(node.name,
2190 for node in dangling_nodes]
2192 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2194 "the following nodes (and their instances) belong to a non"
2195 " existing group: %s", utils.CommaJoin(pretty_dangling))
2197 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2199 "the following instances have a non-existing primary-node:"
2200 " %s", utils.CommaJoin(no_node_instances))
2205 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2206 """Verifies the status of a node group.
2209 HPATH = "cluster-verify"
2210 HTYPE = constants.HTYPE_CLUSTER
2213 _HOOKS_INDENT_RE = re.compile("^", re.M)
2215 class NodeImage(object):
2216 """A class representing the logical and physical status of a node.
2219 @ivar name: the node name to which this object refers
2220 @ivar volumes: a structure as returned from
2221 L{ganeti.backend.GetVolumeList} (runtime)
2222 @ivar instances: a list of running instances (runtime)
2223 @ivar pinst: list of configured primary instances (config)
2224 @ivar sinst: list of configured secondary instances (config)
2225 @ivar sbp: dictionary of {primary-node: list of instances} for all
2226 instances for which this node is secondary (config)
2227 @ivar mfree: free memory, as reported by hypervisor (runtime)
2228 @ivar dfree: free disk, as reported by the node (runtime)
2229 @ivar offline: the offline status (config)
2230 @type rpc_fail: boolean
2231 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2232 not whether the individual keys were correct) (runtime)
2233 @type lvm_fail: boolean
2234 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2235 @type hyp_fail: boolean
2236 @ivar hyp_fail: whether the RPC call didn't return the instance list
2237 @type ghost: boolean
2238 @ivar ghost: whether this is a known node or not (config)
2239 @type os_fail: boolean
2240 @ivar os_fail: whether the RPC call didn't return valid OS data
2242 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2243 @type vm_capable: boolean
2244 @ivar vm_capable: whether the node can host instances
2246 @ivar pv_min: size in MiB of the smallest PVs
2248 @ivar pv_max: size in MiB of the biggest PVs
2251 def __init__(self, offline=False, name=None, vm_capable=True):
2260 self.offline = offline
2261 self.vm_capable = vm_capable
2262 self.rpc_fail = False
2263 self.lvm_fail = False
2264 self.hyp_fail = False
2266 self.os_fail = False
2271 def ExpandNames(self):
2272 # This raises errors.OpPrereqError on its own:
2273 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2275 # Get instances in node group; this is unsafe and needs verification later
2277 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2279 self.needed_locks = {
2280 locking.LEVEL_INSTANCE: inst_names,
2281 locking.LEVEL_NODEGROUP: [self.group_uuid],
2282 locking.LEVEL_NODE: [],
2284 # This opcode is run by watcher every five minutes and acquires all nodes
2285 # for a group. It doesn't run for a long time, so it's better to acquire
2286 # the node allocation lock as well.
2287 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2290 self.share_locks = _ShareAll()
2292 def DeclareLocks(self, level):
2293 if level == locking.LEVEL_NODE:
2294 # Get members of node group; this is unsafe and needs verification later
2295 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2297 all_inst_info = self.cfg.GetAllInstancesInfo()
2299 # In Exec(), we warn about mirrored instances that have primary and
2300 # secondary living in separate node groups. To fully verify that
2301 # volumes for these instances are healthy, we will need to do an
2302 # extra call to their secondaries. We ensure here those nodes will
2304 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2305 # Important: access only the instances whose lock is owned
2306 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2307 nodes.update(all_inst_info[inst].secondary_nodes)
2309 self.needed_locks[locking.LEVEL_NODE] = nodes
2311 def CheckPrereq(self):
2312 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2313 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2315 group_nodes = set(self.group_info.members)
2317 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2320 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2322 unlocked_instances = \
2323 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2326 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2327 utils.CommaJoin(unlocked_nodes),
2330 if unlocked_instances:
2331 raise errors.OpPrereqError("Missing lock for instances: %s" %
2332 utils.CommaJoin(unlocked_instances),
2335 self.all_node_info = self.cfg.GetAllNodesInfo()
2336 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2338 self.my_node_names = utils.NiceSort(group_nodes)
2339 self.my_inst_names = utils.NiceSort(group_instances)
2341 self.my_node_info = dict((name, self.all_node_info[name])
2342 for name in self.my_node_names)
2344 self.my_inst_info = dict((name, self.all_inst_info[name])
2345 for name in self.my_inst_names)
2347 # We detect here the nodes that will need the extra RPC calls for verifying
2348 # split LV volumes; they should be locked.
2349 extra_lv_nodes = set()
2351 for inst in self.my_inst_info.values():
2352 if inst.disk_template in constants.DTS_INT_MIRROR:
2353 for nname in inst.all_nodes:
2354 if self.all_node_info[nname].group != self.group_uuid:
2355 extra_lv_nodes.add(nname)
2357 unlocked_lv_nodes = \
2358 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2360 if unlocked_lv_nodes:
2361 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2362 utils.CommaJoin(unlocked_lv_nodes),
2364 self.extra_lv_nodes = list(extra_lv_nodes)
2366 def _VerifyNode(self, ninfo, nresult):
2367 """Perform some basic validation on data returned from a node.
2369 - check the result data structure is well formed and has all the
2371 - check ganeti version
2373 @type ninfo: L{objects.Node}
2374 @param ninfo: the node to check
2375 @param nresult: the results from the node
2377 @return: whether overall this call was successful (and we can expect
2378 reasonable values in the respose)
2382 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2384 # main result, nresult should be a non-empty dict
2385 test = not nresult or not isinstance(nresult, dict)
2386 _ErrorIf(test, constants.CV_ENODERPC, node,
2387 "unable to verify node: no data returned")
2391 # compares ganeti version
2392 local_version = constants.PROTOCOL_VERSION
2393 remote_version = nresult.get("version", None)
2394 test = not (remote_version and
2395 isinstance(remote_version, (list, tuple)) and
2396 len(remote_version) == 2)
2397 _ErrorIf(test, constants.CV_ENODERPC, node,
2398 "connection to node returned invalid data")
2402 test = local_version != remote_version[0]
2403 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2404 "incompatible protocol versions: master %s,"
2405 " node %s", local_version, remote_version[0])
2409 # node seems compatible, we can actually try to look into its results
2411 # full package version
2412 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2413 constants.CV_ENODEVERSION, node,
2414 "software version mismatch: master %s, node %s",
2415 constants.RELEASE_VERSION, remote_version[1],
2416 code=self.ETYPE_WARNING)
2418 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2419 if ninfo.vm_capable and isinstance(hyp_result, dict):
2420 for hv_name, hv_result in hyp_result.iteritems():
2421 test = hv_result is not None
2422 _ErrorIf(test, constants.CV_ENODEHV, node,
2423 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2425 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2426 if ninfo.vm_capable and isinstance(hvp_result, list):
2427 for item, hv_name, hv_result in hvp_result:
2428 _ErrorIf(True, constants.CV_ENODEHV, node,
2429 "hypervisor %s parameter verify failure (source %s): %s",
2430 hv_name, item, hv_result)
2432 test = nresult.get(constants.NV_NODESETUP,
2433 ["Missing NODESETUP results"])
2434 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2439 def _VerifyNodeTime(self, ninfo, nresult,
2440 nvinfo_starttime, nvinfo_endtime):
2441 """Check the node time.
2443 @type ninfo: L{objects.Node}
2444 @param ninfo: the node to check
2445 @param nresult: the remote results for the node
2446 @param nvinfo_starttime: the start time of the RPC call
2447 @param nvinfo_endtime: the end time of the RPC call
2451 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2453 ntime = nresult.get(constants.NV_TIME, None)
2455 ntime_merged = utils.MergeTime(ntime)
2456 except (ValueError, TypeError):
2457 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2460 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2461 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2462 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2463 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2467 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2468 "Node time diverges by at least %s from master node time",
2471 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2472 """Check the node LVM results and update info for cross-node checks.
2474 @type ninfo: L{objects.Node}
2475 @param ninfo: the node to check
2476 @param nresult: the remote results for the node
2477 @param vg_name: the configured VG name
2478 @type nimg: L{NodeImage}
2479 @param nimg: node image
2486 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 # checks vg existence and size > 20G
2489 vglist = nresult.get(constants.NV_VGLIST, None)
2491 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2493 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2494 constants.MIN_VG_SIZE)
2495 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2498 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2500 self._Error(constants.CV_ENODELVM, node, em)
2501 if pvminmax is not None:
2502 (nimg.pv_min, nimg.pv_max) = pvminmax
2504 def _VerifyGroupLVM(self, node_image, vg_name):
2505 """Check cross-node consistency in LVM.
2507 @type node_image: dict
2508 @param node_image: info about nodes, mapping from node to names to
2509 L{NodeImage} objects
2510 @param vg_name: the configured VG name
2516 # Only exlcusive storage needs this kind of checks
2517 if not self._exclusive_storage:
2520 # exclusive_storage wants all PVs to have the same size (approximately),
2521 # if the smallest and the biggest ones are okay, everything is fine.
2522 # pv_min is None iff pv_max is None
2523 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2526 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2527 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2528 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2529 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2530 "PV sizes differ too much in the group; smallest (%s MB) is"
2531 " on %s, biggest (%s MB) is on %s",
2532 pvmin, minnode, pvmax, maxnode)
2534 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2535 """Check the node bridges.
2537 @type ninfo: L{objects.Node}
2538 @param ninfo: the node to check
2539 @param nresult: the remote results for the node
2540 @param bridges: the expected list of bridges
2547 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2549 missing = nresult.get(constants.NV_BRIDGES, None)
2550 test = not isinstance(missing, list)
2551 _ErrorIf(test, constants.CV_ENODENET, node,
2552 "did not return valid bridge information")
2554 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2555 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2557 def _VerifyNodeUserScripts(self, ninfo, nresult):
2558 """Check the results of user scripts presence and executability on the node
2560 @type ninfo: L{objects.Node}
2561 @param ninfo: the node to check
2562 @param nresult: the remote results for the node
2567 test = not constants.NV_USERSCRIPTS in nresult
2568 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2569 "did not return user scripts information")
2571 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2573 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2574 "user scripts not present or not executable: %s" %
2575 utils.CommaJoin(sorted(broken_scripts)))
2577 def _VerifyNodeNetwork(self, ninfo, nresult):
2578 """Check the node network connectivity results.
2580 @type ninfo: L{objects.Node}
2581 @param ninfo: the node to check
2582 @param nresult: the remote results for the node
2586 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2588 test = constants.NV_NODELIST not in nresult
2589 _ErrorIf(test, constants.CV_ENODESSH, node,
2590 "node hasn't returned node ssh connectivity data")
2592 if nresult[constants.NV_NODELIST]:
2593 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2594 _ErrorIf(True, constants.CV_ENODESSH, node,
2595 "ssh communication with node '%s': %s", a_node, a_msg)
2597 test = constants.NV_NODENETTEST not in nresult
2598 _ErrorIf(test, constants.CV_ENODENET, node,
2599 "node hasn't returned node tcp connectivity data")
2601 if nresult[constants.NV_NODENETTEST]:
2602 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2604 _ErrorIf(True, constants.CV_ENODENET, node,
2605 "tcp communication with node '%s': %s",
2606 anode, nresult[constants.NV_NODENETTEST][anode])
2608 test = constants.NV_MASTERIP not in nresult
2609 _ErrorIf(test, constants.CV_ENODENET, node,
2610 "node hasn't returned node master IP reachability data")
2612 if not nresult[constants.NV_MASTERIP]:
2613 if node == self.master_node:
2614 msg = "the master node cannot reach the master IP (not configured?)"
2616 msg = "cannot reach the master IP"
2617 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2619 def _VerifyInstance(self, instance, inst_config, node_image,
2621 """Verify an instance.
2623 This function checks to see if the required block devices are
2624 available on the instance's node, and that the nodes are in the correct
2628 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 pnode = inst_config.primary_node
2630 pnode_img = node_image[pnode]
2631 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2633 node_vol_should = {}
2634 inst_config.MapLVsByNode(node_vol_should)
2636 cluster = self.cfg.GetClusterInfo()
2637 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2639 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2640 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2641 code=self.ETYPE_WARNING)
2643 for node in node_vol_should:
2644 n_img = node_image[node]
2645 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2646 # ignore missing volumes on offline or broken nodes
2648 for volume in node_vol_should[node]:
2649 test = volume not in n_img.volumes
2650 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2651 "volume %s missing on node %s", volume, node)
2653 if inst_config.admin_state == constants.ADMINST_UP:
2654 test = instance not in pnode_img.instances and not pnode_img.offline
2655 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2656 "instance not running on its primary node %s",
2658 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2659 "instance is marked as running and lives on offline node %s",
2662 diskdata = [(nname, success, status, idx)
2663 for (nname, disks) in diskstatus.items()
2664 for idx, (success, status) in enumerate(disks)]
2666 for nname, success, bdev_status, idx in diskdata:
2667 # the 'ghost node' construction in Exec() ensures that we have a
2669 snode = node_image[nname]
2670 bad_snode = snode.ghost or snode.offline
2671 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2672 not success and not bad_snode,
2673 constants.CV_EINSTANCEFAULTYDISK, instance,
2674 "couldn't retrieve status for disk/%s on %s: %s",
2675 idx, nname, bdev_status)
2676 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2677 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2678 constants.CV_EINSTANCEFAULTYDISK, instance,
2679 "disk/%s on %s is faulty", idx, nname)
2681 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2682 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2683 " primary node failed", instance)
2685 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2686 constants.CV_EINSTANCELAYOUT,
2687 instance, "instance has multiple secondary nodes: %s",
2688 utils.CommaJoin(inst_config.secondary_nodes),
2689 code=self.ETYPE_WARNING)
2691 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2692 # Disk template not compatible with exclusive_storage: no instance
2693 # node should have the flag set
2694 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2695 inst_config.all_nodes)
2696 es_nodes = [n for (n, es) in es_flags.items()
2698 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2699 "instance has template %s, which is not supported on nodes"
2700 " that have exclusive storage set: %s",
2701 inst_config.disk_template, utils.CommaJoin(es_nodes))
2703 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2704 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2705 instance_groups = {}
2707 for node in instance_nodes:
2708 instance_groups.setdefault(self.all_node_info[node].group,
2712 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2713 # Sort so that we always list the primary node first.
2714 for group, nodes in sorted(instance_groups.items(),
2715 key=lambda (_, nodes): pnode in nodes,
2718 self._ErrorIf(len(instance_groups) > 1,
2719 constants.CV_EINSTANCESPLITGROUPS,
2720 instance, "instance has primary and secondary nodes in"
2721 " different groups: %s", utils.CommaJoin(pretty_list),
2722 code=self.ETYPE_WARNING)
2724 inst_nodes_offline = []
2725 for snode in inst_config.secondary_nodes:
2726 s_img = node_image[snode]
2727 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2728 snode, "instance %s, connection to secondary node failed",
2732 inst_nodes_offline.append(snode)
2734 # warn that the instance lives on offline nodes
2735 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2736 "instance has offline secondary node(s) %s",
2737 utils.CommaJoin(inst_nodes_offline))
2738 # ... or ghost/non-vm_capable nodes
2739 for node in inst_config.all_nodes:
2740 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2741 instance, "instance lives on ghost node %s", node)
2742 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2743 instance, "instance lives on non-vm_capable node %s", node)
2745 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2746 """Verify if there are any unknown volumes in the cluster.
2748 The .os, .swap and backup volumes are ignored. All other volumes are
2749 reported as unknown.
2751 @type reserved: L{ganeti.utils.FieldSet}
2752 @param reserved: a FieldSet of reserved volume names
2755 for node, n_img in node_image.items():
2756 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2757 self.all_node_info[node].group != self.group_uuid):
2758 # skip non-healthy nodes
2760 for volume in n_img.volumes:
2761 test = ((node not in node_vol_should or
2762 volume not in node_vol_should[node]) and
2763 not reserved.Matches(volume))
2764 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2765 "volume %s is unknown", volume)
2767 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2768 """Verify N+1 Memory Resilience.
2770 Check that if one single node dies we can still start all the
2771 instances it was primary for.
2774 cluster_info = self.cfg.GetClusterInfo()
2775 for node, n_img in node_image.items():
2776 # This code checks that every node which is now listed as
2777 # secondary has enough memory to host all instances it is
2778 # supposed to should a single other node in the cluster fail.
2779 # FIXME: not ready for failover to an arbitrary node
2780 # FIXME: does not support file-backed instances
2781 # WARNING: we currently take into account down instances as well
2782 # as up ones, considering that even if they're down someone
2783 # might want to start them even in the event of a node failure.
2784 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2785 # we're skipping nodes marked offline and nodes in other groups from
2786 # the N+1 warning, since most likely we don't have good memory
2787 # infromation from them; we already list instances living on such
2788 # nodes, and that's enough warning
2790 #TODO(dynmem): also consider ballooning out other instances
2791 for prinode, instances in n_img.sbp.items():
2793 for instance in instances:
2794 bep = cluster_info.FillBE(instance_cfg[instance])
2795 if bep[constants.BE_AUTO_BALANCE]:
2796 needed_mem += bep[constants.BE_MINMEM]
2797 test = n_img.mfree < needed_mem
2798 self._ErrorIf(test, constants.CV_ENODEN1, node,
2799 "not enough memory to accomodate instance failovers"
2800 " should node %s fail (%dMiB needed, %dMiB available)",
2801 prinode, needed_mem, n_img.mfree)
2804 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2805 (files_all, files_opt, files_mc, files_vm)):
2806 """Verifies file checksums collected from all nodes.
2808 @param errorif: Callback for reporting errors
2809 @param nodeinfo: List of L{objects.Node} objects
2810 @param master_node: Name of master node
2811 @param all_nvinfo: RPC results
2814 # Define functions determining which nodes to consider for a file
2817 (files_mc, lambda node: (node.master_candidate or
2818 node.name == master_node)),
2819 (files_vm, lambda node: node.vm_capable),
2822 # Build mapping from filename to list of nodes which should have the file
2824 for (files, fn) in files2nodefn:
2826 filenodes = nodeinfo
2828 filenodes = filter(fn, nodeinfo)
2829 nodefiles.update((filename,
2830 frozenset(map(operator.attrgetter("name"), filenodes)))
2831 for filename in files)
2833 assert set(nodefiles) == (files_all | files_mc | files_vm)
2835 fileinfo = dict((filename, {}) for filename in nodefiles)
2836 ignore_nodes = set()
2838 for node in nodeinfo:
2840 ignore_nodes.add(node.name)
2843 nresult = all_nvinfo[node.name]
2845 if nresult.fail_msg or not nresult.payload:
2848 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2849 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2850 for (key, value) in fingerprints.items())
2853 test = not (node_files and isinstance(node_files, dict))
2854 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2855 "Node did not return file checksum data")
2857 ignore_nodes.add(node.name)
2860 # Build per-checksum mapping from filename to nodes having it
2861 for (filename, checksum) in node_files.items():
2862 assert filename in nodefiles
2863 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2865 for (filename, checksums) in fileinfo.items():
2866 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2868 # Nodes having the file
2869 with_file = frozenset(node_name
2870 for nodes in fileinfo[filename].values()
2871 for node_name in nodes) - ignore_nodes
2873 expected_nodes = nodefiles[filename] - ignore_nodes
2875 # Nodes missing file
2876 missing_file = expected_nodes - with_file
2878 if filename in files_opt:
2880 errorif(missing_file and missing_file != expected_nodes,
2881 constants.CV_ECLUSTERFILECHECK, None,
2882 "File %s is optional, but it must exist on all or no"
2883 " nodes (not found on %s)",
2884 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2886 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2887 "File %s is missing from node(s) %s", filename,
2888 utils.CommaJoin(utils.NiceSort(missing_file)))
2890 # Warn if a node has a file it shouldn't
2891 unexpected = with_file - expected_nodes
2893 constants.CV_ECLUSTERFILECHECK, None,
2894 "File %s should not exist on node(s) %s",
2895 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2897 # See if there are multiple versions of the file
2898 test = len(checksums) > 1
2900 variants = ["variant %s on %s" %
2901 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2902 for (idx, (checksum, nodes)) in
2903 enumerate(sorted(checksums.items()))]
2907 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2908 "File %s found with %s different checksums (%s)",
2909 filename, len(checksums), "; ".join(variants))
2911 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2913 """Verifies and the node DRBD status.
2915 @type ninfo: L{objects.Node}
2916 @param ninfo: the node to check
2917 @param nresult: the remote results for the node
2918 @param instanceinfo: the dict of instances
2919 @param drbd_helper: the configured DRBD usermode helper
2920 @param drbd_map: the DRBD map as returned by
2921 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2925 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2928 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2929 test = (helper_result is None)
2930 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2931 "no drbd usermode helper returned")
2933 status, payload = helper_result
2935 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2936 "drbd usermode helper check unsuccessful: %s", payload)
2937 test = status and (payload != drbd_helper)
2938 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2939 "wrong drbd usermode helper: %s", payload)
2941 # compute the DRBD minors
2943 for minor, instance in drbd_map[node].items():
2944 test = instance not in instanceinfo
2945 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2946 "ghost instance '%s' in temporary DRBD map", instance)
2947 # ghost instance should not be running, but otherwise we
2948 # don't give double warnings (both ghost instance and
2949 # unallocated minor in use)
2951 node_drbd[minor] = (instance, False)
2953 instance = instanceinfo[instance]
2954 node_drbd[minor] = (instance.name,
2955 instance.admin_state == constants.ADMINST_UP)
2957 # and now check them
2958 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2959 test = not isinstance(used_minors, (tuple, list))
2960 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2961 "cannot parse drbd status file: %s", str(used_minors))
2963 # we cannot check drbd status
2966 for minor, (iname, must_exist) in node_drbd.items():
2967 test = minor not in used_minors and must_exist
2968 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2969 "drbd minor %d of instance %s is not active", minor, iname)
2970 for minor in used_minors:
2971 test = minor not in node_drbd
2972 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2973 "unallocated drbd minor %d is in use", minor)
2975 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2976 """Builds the node OS structures.
2978 @type ninfo: L{objects.Node}
2979 @param ninfo: the node to check
2980 @param nresult: the remote results for the node
2981 @param nimg: the node image object
2985 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2987 remote_os = nresult.get(constants.NV_OSLIST, None)
2988 test = (not isinstance(remote_os, list) or
2989 not compat.all(isinstance(v, list) and len(v) == 7
2990 for v in remote_os))
2992 _ErrorIf(test, constants.CV_ENODEOS, node,
2993 "node hasn't returned valid OS data")
3002 for (name, os_path, status, diagnose,
3003 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3005 if name not in os_dict:
3008 # parameters is a list of lists instead of list of tuples due to
3009 # JSON lacking a real tuple type, fix it:
3010 parameters = [tuple(v) for v in parameters]
3011 os_dict[name].append((os_path, status, diagnose,
3012 set(variants), set(parameters), set(api_ver)))
3014 nimg.oslist = os_dict
3016 def _VerifyNodeOS(self, ninfo, nimg, base):
3017 """Verifies the node OS list.
3019 @type ninfo: L{objects.Node}
3020 @param ninfo: the node to check
3021 @param nimg: the node image object
3022 @param base: the 'template' node we match against (e.g. from the master)
3026 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3028 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3030 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3031 for os_name, os_data in nimg.oslist.items():
3032 assert os_data, "Empty OS status for OS %s?!" % os_name
3033 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3034 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3035 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3036 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3037 "OS '%s' has multiple entries (first one shadows the rest): %s",
3038 os_name, utils.CommaJoin([v[0] for v in os_data]))
3039 # comparisons with the 'base' image
3040 test = os_name not in base.oslist
3041 _ErrorIf(test, constants.CV_ENODEOS, node,
3042 "Extra OS %s not present on reference node (%s)",
3046 assert base.oslist[os_name], "Base node has empty OS status?"
3047 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3049 # base OS is invalid, skipping
3051 for kind, a, b in [("API version", f_api, b_api),
3052 ("variants list", f_var, b_var),
3053 ("parameters", beautify_params(f_param),
3054 beautify_params(b_param))]:
3055 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3056 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3057 kind, os_name, base.name,
3058 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3060 # check any missing OSes
3061 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3062 _ErrorIf(missing, constants.CV_ENODEOS, node,
3063 "OSes present on reference node %s but missing on this node: %s",
3064 base.name, utils.CommaJoin(missing))
3066 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3067 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3069 @type ninfo: L{objects.Node}
3070 @param ninfo: the node to check
3071 @param nresult: the remote results for the node
3072 @type is_master: bool
3073 @param is_master: Whether node is the master node
3079 (constants.ENABLE_FILE_STORAGE or
3080 constants.ENABLE_SHARED_FILE_STORAGE)):
3082 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3084 # This should never happen
3085 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3086 "Node did not return forbidden file storage paths")
3088 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3089 "Found forbidden file storage paths: %s",
3090 utils.CommaJoin(fspaths))
3092 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3093 constants.CV_ENODEFILESTORAGEPATHS, node,
3094 "Node should not have returned forbidden file storage"
3097 def _VerifyOob(self, ninfo, nresult):
3098 """Verifies out of band functionality of a node.
3100 @type ninfo: L{objects.Node}
3101 @param ninfo: the node to check
3102 @param nresult: the remote results for the node
3106 # We just have to verify the paths on master and/or master candidates
3107 # as the oob helper is invoked on the master
3108 if ((ninfo.master_candidate or ninfo.master_capable) and
3109 constants.NV_OOB_PATHS in nresult):
3110 for path_result in nresult[constants.NV_OOB_PATHS]:
3111 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3113 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3114 """Verifies and updates the node volume data.
3116 This function will update a L{NodeImage}'s internal structures
3117 with data from the remote call.
3119 @type ninfo: L{objects.Node}
3120 @param ninfo: the node to check
3121 @param nresult: the remote results for the node
3122 @param nimg: the node image object
3123 @param vg_name: the configured VG name
3127 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3129 nimg.lvm_fail = True
3130 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3133 elif isinstance(lvdata, basestring):
3134 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3135 utils.SafeEncode(lvdata))
3136 elif not isinstance(lvdata, dict):
3137 _ErrorIf(True, constants.CV_ENODELVM, node,
3138 "rpc call to node failed (lvlist)")
3140 nimg.volumes = lvdata
3141 nimg.lvm_fail = False
3143 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3144 """Verifies and updates the node instance list.
3146 If the listing was successful, then updates this node's instance
3147 list. Otherwise, it marks the RPC call as failed for the instance
3150 @type ninfo: L{objects.Node}
3151 @param ninfo: the node to check
3152 @param nresult: the remote results for the node
3153 @param nimg: the node image object
3156 idata = nresult.get(constants.NV_INSTANCELIST, None)
3157 test = not isinstance(idata, list)
3158 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3159 "rpc call to node failed (instancelist): %s",
3160 utils.SafeEncode(str(idata)))
3162 nimg.hyp_fail = True
3164 nimg.instances = idata
3166 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3167 """Verifies and computes a node information map
3169 @type ninfo: L{objects.Node}
3170 @param ninfo: the node to check
3171 @param nresult: the remote results for the node
3172 @param nimg: the node image object
3173 @param vg_name: the configured VG name
3177 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3179 # try to read free memory (from the hypervisor)
3180 hv_info = nresult.get(constants.NV_HVINFO, None)
3181 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3182 _ErrorIf(test, constants.CV_ENODEHV, node,
3183 "rpc call to node failed (hvinfo)")
3186 nimg.mfree = int(hv_info["memory_free"])
3187 except (ValueError, TypeError):
3188 _ErrorIf(True, constants.CV_ENODERPC, node,
3189 "node returned invalid nodeinfo, check hypervisor")
3191 # FIXME: devise a free space model for file based instances as well
3192 if vg_name is not None:
3193 test = (constants.NV_VGLIST not in nresult or
3194 vg_name not in nresult[constants.NV_VGLIST])
3195 _ErrorIf(test, constants.CV_ENODELVM, node,
3196 "node didn't return data for the volume group '%s'"
3197 " - it is either missing or broken", vg_name)
3200 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3201 except (ValueError, TypeError):
3202 _ErrorIf(True, constants.CV_ENODERPC, node,
3203 "node returned invalid LVM info, check LVM status")
3205 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3206 """Gets per-disk status information for all instances.
3208 @type nodelist: list of strings
3209 @param nodelist: Node names
3210 @type node_image: dict of (name, L{objects.Node})
3211 @param node_image: Node objects
3212 @type instanceinfo: dict of (name, L{objects.Instance})
3213 @param instanceinfo: Instance objects
3214 @rtype: {instance: {node: [(succes, payload)]}}
3215 @return: a dictionary of per-instance dictionaries with nodes as
3216 keys and disk information as values; the disk information is a
3217 list of tuples (success, payload)
3220 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3223 node_disks_devonly = {}
3224 diskless_instances = set()
3225 diskless = constants.DT_DISKLESS
3227 for nname in nodelist:
3228 node_instances = list(itertools.chain(node_image[nname].pinst,
3229 node_image[nname].sinst))
3230 diskless_instances.update(inst for inst in node_instances
3231 if instanceinfo[inst].disk_template == diskless)
3232 disks = [(inst, disk)
3233 for inst in node_instances
3234 for disk in instanceinfo[inst].disks]
3237 # No need to collect data
3240 node_disks[nname] = disks
3242 # _AnnotateDiskParams makes already copies of the disks
3244 for (inst, dev) in disks:
3245 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3246 self.cfg.SetDiskID(anno_disk, nname)
3247 devonly.append(anno_disk)
3249 node_disks_devonly[nname] = devonly
3251 assert len(node_disks) == len(node_disks_devonly)
3253 # Collect data from all nodes with disks
3254 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3257 assert len(result) == len(node_disks)
3261 for (nname, nres) in result.items():
3262 disks = node_disks[nname]
3265 # No data from this node
3266 data = len(disks) * [(False, "node offline")]
3269 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3270 "while getting disk information: %s", msg)
3272 # No data from this node
3273 data = len(disks) * [(False, msg)]
3276 for idx, i in enumerate(nres.payload):
3277 if isinstance(i, (tuple, list)) and len(i) == 2:
3280 logging.warning("Invalid result from node %s, entry %d: %s",
3282 data.append((False, "Invalid result from the remote node"))
3284 for ((inst, _), status) in zip(disks, data):
3285 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3287 # Add empty entries for diskless instances.
3288 for inst in diskless_instances:
3289 assert inst not in instdisk
3292 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3293 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3294 compat.all(isinstance(s, (tuple, list)) and
3295 len(s) == 2 for s in statuses)
3296 for inst, nnames in instdisk.items()
3297 for nname, statuses in nnames.items())
3299 instdisk_keys = set(instdisk)
3300 instanceinfo_keys = set(instanceinfo)
3301 assert instdisk_keys == instanceinfo_keys, \
3302 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3303 (instdisk_keys, instanceinfo_keys))
3308 def _SshNodeSelector(group_uuid, all_nodes):
3309 """Create endless iterators for all potential SSH check hosts.
3312 nodes = [node for node in all_nodes
3313 if (node.group != group_uuid and
3315 keyfunc = operator.attrgetter("group")
3317 return map(itertools.cycle,
3318 [sorted(map(operator.attrgetter("name"), names))
3319 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3323 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3324 """Choose which nodes should talk to which other nodes.
3326 We will make nodes contact all nodes in their group, and one node from
3329 @warning: This algorithm has a known issue if one node group is much
3330 smaller than others (e.g. just one node). In such a case all other
3331 nodes will talk to the single node.
3334 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3335 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3337 return (online_nodes,
3338 dict((name, sorted([i.next() for i in sel]))
3339 for name in online_nodes))
3341 def BuildHooksEnv(self):
3344 Cluster-Verify hooks just ran in the post phase and their failure makes
3345 the output be logged in the verify output and the verification to fail.
3349 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3352 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3353 for node in self.my_node_info.values())
3357 def BuildHooksNodes(self):
3358 """Build hooks nodes.
3361 return ([], self.my_node_names)
3363 def Exec(self, feedback_fn):
3364 """Verify integrity of the node group, performing various test on nodes.
3367 # This method has too many local variables. pylint: disable=R0914
3368 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3370 if not self.my_node_names:
3372 feedback_fn("* Empty node group, skipping verification")
3376 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3377 verbose = self.op.verbose
3378 self._feedback_fn = feedback_fn
3380 vg_name = self.cfg.GetVGName()
3381 drbd_helper = self.cfg.GetDRBDHelper()
3382 cluster = self.cfg.GetClusterInfo()
3383 hypervisors = cluster.enabled_hypervisors
3384 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3386 i_non_redundant = [] # Non redundant instances
3387 i_non_a_balanced = [] # Non auto-balanced instances
3388 i_offline = 0 # Count of offline instances
3389 n_offline = 0 # Count of offline nodes
3390 n_drained = 0 # Count of nodes being drained
3391 node_vol_should = {}
3393 # FIXME: verify OS list
3396 filemap = _ComputeAncillaryFiles(cluster, False)
3398 # do local checksums
3399 master_node = self.master_node = self.cfg.GetMasterNode()
3400 master_ip = self.cfg.GetMasterIP()
3402 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3405 if self.cfg.GetUseExternalMipScript():
3406 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3408 node_verify_param = {
3409 constants.NV_FILELIST:
3410 map(vcluster.MakeVirtualPath,
3411 utils.UniqueSequence(filename
3412 for files in filemap
3413 for filename in files)),
3414 constants.NV_NODELIST:
3415 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3416 self.all_node_info.values()),
3417 constants.NV_HYPERVISOR: hypervisors,
3418 constants.NV_HVPARAMS:
3419 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3420 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3421 for node in node_data_list
3422 if not node.offline],
3423 constants.NV_INSTANCELIST: hypervisors,
3424 constants.NV_VERSION: None,
3425 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3426 constants.NV_NODESETUP: None,
3427 constants.NV_TIME: None,
3428 constants.NV_MASTERIP: (master_node, master_ip),
3429 constants.NV_OSLIST: None,
3430 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3431 constants.NV_USERSCRIPTS: user_scripts,
3434 if vg_name is not None:
3435 node_verify_param[constants.NV_VGLIST] = None
3436 node_verify_param[constants.NV_LVLIST] = vg_name
3437 node_verify_param[constants.NV_PVLIST] = [vg_name]
3440 node_verify_param[constants.NV_DRBDLIST] = None
3441 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3443 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3444 # Load file storage paths only from master node
3445 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3448 # FIXME: this needs to be changed per node-group, not cluster-wide
3450 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3451 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3452 bridges.add(default_nicpp[constants.NIC_LINK])
3453 for instance in self.my_inst_info.values():
3454 for nic in instance.nics:
3455 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3456 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3457 bridges.add(full_nic[constants.NIC_LINK])
3460 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3462 # Build our expected cluster state
3463 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3465 vm_capable=node.vm_capable))
3466 for node in node_data_list)
3470 for node in self.all_node_info.values():
3471 path = _SupportsOob(self.cfg, node)
3472 if path and path not in oob_paths:
3473 oob_paths.append(path)
3476 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3478 for instance in self.my_inst_names:
3479 inst_config = self.my_inst_info[instance]
3480 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3483 for nname in inst_config.all_nodes:
3484 if nname not in node_image:
3485 gnode = self.NodeImage(name=nname)
3486 gnode.ghost = (nname not in self.all_node_info)
3487 node_image[nname] = gnode
3489 inst_config.MapLVsByNode(node_vol_should)
3491 pnode = inst_config.primary_node
3492 node_image[pnode].pinst.append(instance)
3494 for snode in inst_config.secondary_nodes:
3495 nimg = node_image[snode]
3496 nimg.sinst.append(instance)
3497 if pnode not in nimg.sbp:
3498 nimg.sbp[pnode] = []
3499 nimg.sbp[pnode].append(instance)
3501 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3503 # The value of exclusive_storage should be the same across the group, so if
3504 # it's True for at least a node, we act as if it were set for all the nodes
3505 self._exclusive_storage = compat.any(es_flags.values())
3506 if self._exclusive_storage:
3507 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3508 es_unset_nodes = [n for (n, es) in es_flags.items()
3512 self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3513 "The exclusive_storage flag should be uniform in a group,"
3514 " but these nodes have it unset: %s",
3515 utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3516 self.LogWarning("Some checks required by exclusive storage will be"
3517 " performed also on nodes with the flag unset")
3519 # At this point, we have the in-memory data structures complete,
3520 # except for the runtime information, which we'll gather next
3522 # Due to the way our RPC system works, exact response times cannot be
3523 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3524 # time before and after executing the request, we can at least have a time
3526 nvinfo_starttime = time.time()
3527 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3529 self.cfg.GetClusterName())
3530 nvinfo_endtime = time.time()
3532 if self.extra_lv_nodes and vg_name is not None:
3534 self.rpc.call_node_verify(self.extra_lv_nodes,
3535 {constants.NV_LVLIST: vg_name},
3536 self.cfg.GetClusterName())
3538 extra_lv_nvinfo = {}
3540 all_drbd_map = self.cfg.ComputeDRBDMap()
3542 feedback_fn("* Gathering disk information (%s nodes)" %
3543 len(self.my_node_names))
3544 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3547 feedback_fn("* Verifying configuration file consistency")
3549 # If not all nodes are being checked, we need to make sure the master node
3550 # and a non-checked vm_capable node are in the list.
3551 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3553 vf_nvinfo = all_nvinfo.copy()
3554 vf_node_info = list(self.my_node_info.values())
3555 additional_nodes = []
3556 if master_node not in self.my_node_info:
3557 additional_nodes.append(master_node)
3558 vf_node_info.append(self.all_node_info[master_node])
3559 # Add the first vm_capable node we find which is not included,
3560 # excluding the master node (which we already have)
3561 for node in absent_nodes:
3562 nodeinfo = self.all_node_info[node]
3563 if (nodeinfo.vm_capable and not nodeinfo.offline and
3564 node != master_node):
3565 additional_nodes.append(node)
3566 vf_node_info.append(self.all_node_info[node])
3568 key = constants.NV_FILELIST
3569 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3570 {key: node_verify_param[key]},
3571 self.cfg.GetClusterName()))
3573 vf_nvinfo = all_nvinfo
3574 vf_node_info = self.my_node_info.values()
3576 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3578 feedback_fn("* Verifying node status")
3582 for node_i in node_data_list:
3584 nimg = node_image[node]
3588 feedback_fn("* Skipping offline node %s" % (node,))
3592 if node == master_node:
3594 elif node_i.master_candidate:
3595 ntype = "master candidate"
3596 elif node_i.drained:
3602 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3604 msg = all_nvinfo[node].fail_msg
3605 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3608 nimg.rpc_fail = True
3611 nresult = all_nvinfo[node].payload
3613 nimg.call_ok = self._VerifyNode(node_i, nresult)
3614 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3615 self._VerifyNodeNetwork(node_i, nresult)
3616 self._VerifyNodeUserScripts(node_i, nresult)
3617 self._VerifyOob(node_i, nresult)
3618 self._VerifyFileStoragePaths(node_i, nresult,
3619 node == master_node)
3622 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3623 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3626 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3627 self._UpdateNodeInstances(node_i, nresult, nimg)
3628 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3629 self._UpdateNodeOS(node_i, nresult, nimg)
3631 if not nimg.os_fail:
3632 if refos_img is None:
3634 self._VerifyNodeOS(node_i, nimg, refos_img)
3635 self._VerifyNodeBridges(node_i, nresult, bridges)
3637 # Check whether all running instancies are primary for the node. (This
3638 # can no longer be done from _VerifyInstance below, since some of the
3639 # wrong instances could be from other node groups.)
3640 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3642 for inst in non_primary_inst:
3643 test = inst in self.all_inst_info
3644 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3645 "instance should not run on node %s", node_i.name)
3646 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3647 "node is running unknown instance %s", inst)
3649 self._VerifyGroupLVM(node_image, vg_name)
3651 for node, result in extra_lv_nvinfo.items():
3652 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3653 node_image[node], vg_name)
3655 feedback_fn("* Verifying instance status")
3656 for instance in self.my_inst_names:
3658 feedback_fn("* Verifying instance %s" % instance)
3659 inst_config = self.my_inst_info[instance]
3660 self._VerifyInstance(instance, inst_config, node_image,
3663 # If the instance is non-redundant we cannot survive losing its primary
3664 # node, so we are not N+1 compliant.
3665 if inst_config.disk_template not in constants.DTS_MIRRORED:
3666 i_non_redundant.append(instance)
3668 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3669 i_non_a_balanced.append(instance)
3671 feedback_fn("* Verifying orphan volumes")
3672 reserved = utils.FieldSet(*cluster.reserved_lvs)
3674 # We will get spurious "unknown volume" warnings if any node of this group
3675 # is secondary for an instance whose primary is in another group. To avoid
3676 # them, we find these instances and add their volumes to node_vol_should.
3677 for inst in self.all_inst_info.values():
3678 for secondary in inst.secondary_nodes:
3679 if (secondary in self.my_node_info
3680 and inst.name not in self.my_inst_info):
3681 inst.MapLVsByNode(node_vol_should)
3684 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3686 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3687 feedback_fn("* Verifying N+1 Memory redundancy")
3688 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3690 feedback_fn("* Other Notes")
3692 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3693 % len(i_non_redundant))
3695 if i_non_a_balanced:
3696 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3697 % len(i_non_a_balanced))
3700 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3703 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3706 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3710 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3711 """Analyze the post-hooks' result
3713 This method analyses the hook result, handles it, and sends some
3714 nicely-formatted feedback back to the user.
3716 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3717 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3718 @param hooks_results: the results of the multi-node hooks rpc call
3719 @param feedback_fn: function used send feedback back to the caller
3720 @param lu_result: previous Exec result
3721 @return: the new Exec result, based on the previous result
3725 # We only really run POST phase hooks, only for non-empty groups,
3726 # and are only interested in their results
3727 if not self.my_node_names:
3730 elif phase == constants.HOOKS_PHASE_POST:
3731 # Used to change hooks' output to proper indentation
3732 feedback_fn("* Hooks Results")
3733 assert hooks_results, "invalid result from hooks"
3735 for node_name in hooks_results:
3736 res = hooks_results[node_name]
3738 test = msg and not res.offline
3739 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3740 "Communication failure in hooks execution: %s", msg)
3741 if res.offline or msg:
3742 # No need to investigate payload if node is offline or gave
3745 for script, hkr, output in res.payload:
3746 test = hkr == constants.HKR_FAIL
3747 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3748 "Script %s failed, output:", script)
3750 output = self._HOOKS_INDENT_RE.sub(" ", output)
3751 feedback_fn("%s" % output)
3757 class LUClusterVerifyDisks(NoHooksLU):
3758 """Verifies the cluster disks status.
3763 def ExpandNames(self):
3764 self.share_locks = _ShareAll()
3765 self.needed_locks = {
3766 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3769 def Exec(self, feedback_fn):
3770 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3772 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3773 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3774 for group in group_names])
3777 class LUGroupVerifyDisks(NoHooksLU):
3778 """Verifies the status of all disks in a node group.
3783 def ExpandNames(self):
3784 # Raises errors.OpPrereqError on its own if group can't be found
3785 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3787 self.share_locks = _ShareAll()
3788 self.needed_locks = {
3789 locking.LEVEL_INSTANCE: [],
3790 locking.LEVEL_NODEGROUP: [],
3791 locking.LEVEL_NODE: [],
3793 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3794 # starts one instance of this opcode for every group, which means all
3795 # nodes will be locked for a short amount of time, so it's better to
3796 # acquire the node allocation lock as well.
3797 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3800 def DeclareLocks(self, level):
3801 if level == locking.LEVEL_INSTANCE:
3802 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3804 # Lock instances optimistically, needs verification once node and group
3805 # locks have been acquired
3806 self.needed_locks[locking.LEVEL_INSTANCE] = \
3807 self.cfg.GetNodeGroupInstances(self.group_uuid)
3809 elif level == locking.LEVEL_NODEGROUP:
3810 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3812 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3813 set([self.group_uuid] +
3814 # Lock all groups used by instances optimistically; this requires
3815 # going via the node before it's locked, requiring verification
3818 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3819 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3821 elif level == locking.LEVEL_NODE:
3822 # This will only lock the nodes in the group to be verified which contain
3824 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3825 self._LockInstancesNodes()
3827 # Lock all nodes in group to be verified
3828 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3829 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3830 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3832 def CheckPrereq(self):
3833 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3834 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3835 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3837 assert self.group_uuid in owned_groups
3839 # Check if locked instances are still correct
3840 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3842 # Get instance information
3843 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3845 # Check if node groups for locked instances are still correct
3846 _CheckInstancesNodeGroups(self.cfg, self.instances,
3847 owned_groups, owned_nodes, self.group_uuid)
3849 def Exec(self, feedback_fn):
3850 """Verify integrity of cluster disks.
3852 @rtype: tuple of three items
3853 @return: a tuple of (dict of node-to-node_error, list of instances
3854 which need activate-disks, dict of instance: (node, volume) for
3859 res_instances = set()
3862 nv_dict = _MapInstanceDisksToNodes(
3863 [inst for inst in self.instances.values()
3864 if inst.admin_state == constants.ADMINST_UP])
3867 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3868 set(self.cfg.GetVmCapableNodeList()))
3870 node_lvs = self.rpc.call_lv_list(nodes, [])
3872 for (node, node_res) in node_lvs.items():
3873 if node_res.offline:
3876 msg = node_res.fail_msg
3878 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3879 res_nodes[node] = msg
3882 for lv_name, (_, _, lv_online) in node_res.payload.items():
3883 inst = nv_dict.pop((node, lv_name), None)
3884 if not (lv_online or inst is None):
3885 res_instances.add(inst)
3887 # any leftover items in nv_dict are missing LVs, let's arrange the data
3889 for key, inst in nv_dict.iteritems():
3890 res_missing.setdefault(inst, []).append(list(key))
3892 return (res_nodes, list(res_instances), res_missing)
3895 class LUClusterRepairDiskSizes(NoHooksLU):
3896 """Verifies the cluster disks sizes.
3901 def ExpandNames(self):
3902 if self.op.instances:
3903 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3904 # Not getting the node allocation lock as only a specific set of
3905 # instances (and their nodes) is going to be acquired
3906 self.needed_locks = {
3907 locking.LEVEL_NODE_RES: [],
3908 locking.LEVEL_INSTANCE: self.wanted_names,
3910 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3912 self.wanted_names = None
3913 self.needed_locks = {
3914 locking.LEVEL_NODE_RES: locking.ALL_SET,
3915 locking.LEVEL_INSTANCE: locking.ALL_SET,
3917 # This opcode is acquires the node locks for all instances
3918 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3921 self.share_locks = {
3922 locking.LEVEL_NODE_RES: 1,
3923 locking.LEVEL_INSTANCE: 0,
3924 locking.LEVEL_NODE_ALLOC: 1,
3927 def DeclareLocks(self, level):
3928 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3929 self._LockInstancesNodes(primary_only=True, level=level)
3931 def CheckPrereq(self):
3932 """Check prerequisites.
3934 This only checks the optional instance list against the existing names.
3937 if self.wanted_names is None:
3938 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3940 self.wanted_instances = \
3941 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3943 def _EnsureChildSizes(self, disk):
3944 """Ensure children of the disk have the needed disk size.
3946 This is valid mainly for DRBD8 and fixes an issue where the
3947 children have smaller disk size.
3949 @param disk: an L{ganeti.objects.Disk} object
3952 if disk.dev_type == constants.LD_DRBD8:
3953 assert disk.children, "Empty children for DRBD8?"
3954 fchild = disk.children[0]
3955 mismatch = fchild.size < disk.size
3957 self.LogInfo("Child disk has size %d, parent %d, fixing",
3958 fchild.size, disk.size)
3959 fchild.size = disk.size
3961 # and we recurse on this child only, not on the metadev
3962 return self._EnsureChildSizes(fchild) or mismatch
3966 def Exec(self, feedback_fn):
3967 """Verify the size of cluster disks.
3970 # TODO: check child disks too
3971 # TODO: check differences in size between primary/secondary nodes
3973 for instance in self.wanted_instances:
3974 pnode = instance.primary_node
3975 if pnode not in per_node_disks:
3976 per_node_disks[pnode] = []
3977 for idx, disk in enumerate(instance.disks):
3978 per_node_disks[pnode].append((instance, idx, disk))
3980 assert not (frozenset(per_node_disks.keys()) -
3981 self.owned_locks(locking.LEVEL_NODE_RES)), \
3982 "Not owning correct locks"
3983 assert not self.owned_locks(locking.LEVEL_NODE)
3986 for node, dskl in per_node_disks.items():
3987 newl = [v[2].Copy() for v in dskl]
3989 self.cfg.SetDiskID(dsk, node)
3990 result = self.rpc.call_blockdev_getsize(node, newl)
3992 self.LogWarning("Failure in blockdev_getsize call to node"
3993 " %s, ignoring", node)
3995 if len(result.payload) != len(dskl):
3996 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3997 " result.payload=%s", node, len(dskl), result.payload)
3998 self.LogWarning("Invalid result from node %s, ignoring node results",
4001 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4003 self.LogWarning("Disk %d of instance %s did not return size"
4004 " information, ignoring", idx, instance.name)
4006 if not isinstance(size, (int, long)):
4007 self.LogWarning("Disk %d of instance %s did not return valid"
4008 " size information, ignoring", idx, instance.name)
4011 if size != disk.size:
4012 self.LogInfo("Disk %d of instance %s has mismatched size,"
4013 " correcting: recorded %d, actual %d", idx,
4014 instance.name, disk.size, size)
4016 self.cfg.Update(instance, feedback_fn)
4017 changed.append((instance.name, idx, size))
4018 if self._EnsureChildSizes(disk):
4019 self.cfg.Update(instance, feedback_fn)
4020 changed.append((instance.name, idx, disk.size))
4024 class LUClusterRename(LogicalUnit):
4025 """Rename the cluster.
4028 HPATH = "cluster-rename"
4029 HTYPE = constants.HTYPE_CLUSTER
4031 def BuildHooksEnv(self):
4036 "OP_TARGET": self.cfg.GetClusterName(),
4037 "NEW_NAME": self.op.name,
4040 def BuildHooksNodes(self):
4041 """Build hooks nodes.
4044 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4046 def CheckPrereq(self):
4047 """Verify that the passed name is a valid one.
4050 hostname = netutils.GetHostname(name=self.op.name,
4051 family=self.cfg.GetPrimaryIPFamily())
4053 new_name = hostname.name
4054 self.ip = new_ip = hostname.ip
4055 old_name = self.cfg.GetClusterName()
4056 old_ip = self.cfg.GetMasterIP()
4057 if new_name == old_name and new_ip == old_ip:
4058 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4059 " cluster has changed",
4061 if new_ip != old_ip:
4062 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4063 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4064 " reachable on the network" %
4065 new_ip, errors.ECODE_NOTUNIQUE)
4067 self.op.name = new_name
4069 def Exec(self, feedback_fn):
4070 """Rename the cluster.
4073 clustername = self.op.name
4076 # shutdown the master IP
4077 master_params = self.cfg.GetMasterNetworkParameters()
4078 ems = self.cfg.GetUseExternalMipScript()
4079 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4081 result.Raise("Could not disable the master role")
4084 cluster = self.cfg.GetClusterInfo()
4085 cluster.cluster_name = clustername
4086 cluster.master_ip = new_ip
4087 self.cfg.Update(cluster, feedback_fn)
4089 # update the known hosts file
4090 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4091 node_list = self.cfg.GetOnlineNodeList()
4093 node_list.remove(master_params.name)
4096 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4098 master_params.ip = new_ip
4099 result = self.rpc.call_node_activate_master_ip(master_params.name,
4101 msg = result.fail_msg
4103 self.LogWarning("Could not re-enable the master role on"
4104 " the master, please restart manually: %s", msg)
4109 def _ValidateNetmask(cfg, netmask):
4110 """Checks if a netmask is valid.
4112 @type cfg: L{config.ConfigWriter}
4113 @param cfg: The cluster configuration
4115 @param netmask: the netmask to be verified
4116 @raise errors.OpPrereqError: if the validation fails
4119 ip_family = cfg.GetPrimaryIPFamily()
4121 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4122 except errors.ProgrammerError:
4123 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4124 ip_family, errors.ECODE_INVAL)
4125 if not ipcls.ValidateNetmask(netmask):
4126 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4127 (netmask), errors.ECODE_INVAL)
4130 class LUClusterSetParams(LogicalUnit):
4131 """Change the parameters of the cluster.
4134 HPATH = "cluster-modify"
4135 HTYPE = constants.HTYPE_CLUSTER
4138 def CheckArguments(self):
4142 if self.op.uid_pool:
4143 uidpool.CheckUidPool(self.op.uid_pool)
4145 if self.op.add_uids:
4146 uidpool.CheckUidPool(self.op.add_uids)
4148 if self.op.remove_uids:
4149 uidpool.CheckUidPool(self.op.remove_uids)
4151 if self.op.master_netmask is not None:
4152 _ValidateNetmask(self.cfg, self.op.master_netmask)
4154 if self.op.diskparams:
4155 for dt_params in self.op.diskparams.values():
4156 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4158 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4159 except errors.OpPrereqError, err:
4160 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4163 def ExpandNames(self):
4164 # FIXME: in the future maybe other cluster params won't require checking on
4165 # all nodes to be modified.
4166 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4167 # resource locks the right thing, shouldn't it be the BGL instead?
4168 self.needed_locks = {
4169 locking.LEVEL_NODE: locking.ALL_SET,
4170 locking.LEVEL_INSTANCE: locking.ALL_SET,
4171 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4172 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4174 self.share_locks = _ShareAll()
4176 def BuildHooksEnv(self):
4181 "OP_TARGET": self.cfg.GetClusterName(),
4182 "NEW_VG_NAME": self.op.vg_name,
4185 def BuildHooksNodes(self):
4186 """Build hooks nodes.
4189 mn = self.cfg.GetMasterNode()
4192 def CheckPrereq(self):
4193 """Check prerequisites.
4195 This checks whether the given params don't conflict and
4196 if the given volume group is valid.
4199 if self.op.vg_name is not None and not self.op.vg_name:
4200 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4201 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4202 " instances exist", errors.ECODE_INVAL)
4204 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4205 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4206 raise errors.OpPrereqError("Cannot disable drbd helper while"
4207 " drbd-based instances exist",
4210 node_list = self.owned_locks(locking.LEVEL_NODE)
4212 # if vg_name not None, checks given volume group on all nodes
4214 vglist = self.rpc.call_vg_list(node_list)
4215 for node in node_list:
4216 msg = vglist[node].fail_msg
4218 # ignoring down node
4219 self.LogWarning("Error while gathering data on node %s"
4220 " (ignoring node): %s", node, msg)
4222 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4224 constants.MIN_VG_SIZE)
4226 raise errors.OpPrereqError("Error on node '%s': %s" %
4227 (node, vgstatus), errors.ECODE_ENVIRON)
4229 if self.op.drbd_helper:
4230 # checks given drbd helper on all nodes
4231 helpers = self.rpc.call_drbd_helper(node_list)
4232 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4234 self.LogInfo("Not checking drbd helper on offline node %s", node)
4236 msg = helpers[node].fail_msg
4238 raise errors.OpPrereqError("Error checking drbd helper on node"
4239 " '%s': %s" % (node, msg),
4240 errors.ECODE_ENVIRON)
4241 node_helper = helpers[node].payload
4242 if node_helper != self.op.drbd_helper:
4243 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4244 (node, node_helper), errors.ECODE_ENVIRON)
4246 self.cluster = cluster = self.cfg.GetClusterInfo()
4247 # validate params changes
4248 if self.op.beparams:
4249 objects.UpgradeBeParams(self.op.beparams)
4250 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4251 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4253 if self.op.ndparams:
4254 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4255 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4257 # TODO: we need a more general way to handle resetting
4258 # cluster-level parameters to default values
4259 if self.new_ndparams["oob_program"] == "":
4260 self.new_ndparams["oob_program"] = \
4261 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4263 if self.op.hv_state:
4264 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4265 self.cluster.hv_state_static)
4266 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4267 for hv, values in new_hv_state.items())
4269 if self.op.disk_state:
4270 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4271 self.cluster.disk_state_static)
4272 self.new_disk_state = \
4273 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4274 for name, values in svalues.items()))
4275 for storage, svalues in new_disk_state.items())
4278 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4281 all_instances = self.cfg.GetAllInstancesInfo().values()
4283 for group in self.cfg.GetAllNodeGroupsInfo().values():
4284 instances = frozenset([inst for inst in all_instances
4285 if compat.any(node in group.members
4286 for node in inst.all_nodes)])
4287 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4288 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4289 new = _ComputeNewInstanceViolations(ipol,
4290 new_ipolicy, instances)
4292 violations.update(new)
4295 self.LogWarning("After the ipolicy change the following instances"
4296 " violate them: %s",
4297 utils.CommaJoin(utils.NiceSort(violations)))
4299 if self.op.nicparams:
4300 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4301 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4302 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4305 # check all instances for consistency
4306 for instance in self.cfg.GetAllInstancesInfo().values():
4307 for nic_idx, nic in enumerate(instance.nics):
4308 params_copy = copy.deepcopy(nic.nicparams)
4309 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4311 # check parameter syntax
4313 objects.NIC.CheckParameterSyntax(params_filled)
4314 except errors.ConfigurationError, err:
4315 nic_errors.append("Instance %s, nic/%d: %s" %
4316 (instance.name, nic_idx, err))
4318 # if we're moving instances to routed, check that they have an ip
4319 target_mode = params_filled[constants.NIC_MODE]
4320 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4321 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4322 " address" % (instance.name, nic_idx))
4324 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4325 "\n".join(nic_errors), errors.ECODE_INVAL)
4327 # hypervisor list/parameters
4328 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4329 if self.op.hvparams:
4330 for hv_name, hv_dict in self.op.hvparams.items():
4331 if hv_name not in self.new_hvparams:
4332 self.new_hvparams[hv_name] = hv_dict
4334 self.new_hvparams[hv_name].update(hv_dict)
4336 # disk template parameters
4337 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4338 if self.op.diskparams:
4339 for dt_name, dt_params in self.op.diskparams.items():
4340 if dt_name not in self.op.diskparams:
4341 self.new_diskparams[dt_name] = dt_params
4343 self.new_diskparams[dt_name].update(dt_params)
4345 # os hypervisor parameters
4346 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4348 for os_name, hvs in self.op.os_hvp.items():
4349 if os_name not in self.new_os_hvp:
4350 self.new_os_hvp[os_name] = hvs
4352 for hv_name, hv_dict in hvs.items():
4354 # Delete if it exists
4355 self.new_os_hvp[os_name].pop(hv_name, None)
4356 elif hv_name not in self.new_os_hvp[os_name]:
4357 self.new_os_hvp[os_name][hv_name] = hv_dict
4359 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4362 self.new_osp = objects.FillDict(cluster.osparams, {})
4363 if self.op.osparams:
4364 for os_name, osp in self.op.osparams.items():
4365 if os_name not in self.new_osp:
4366 self.new_osp[os_name] = {}
4368 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4371 if not self.new_osp[os_name]:
4372 # we removed all parameters
4373 del self.new_osp[os_name]
4375 # check the parameter validity (remote check)
4376 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4377 os_name, self.new_osp[os_name])
4379 # changes to the hypervisor list
4380 if self.op.enabled_hypervisors is not None:
4381 self.hv_list = self.op.enabled_hypervisors
4382 for hv in self.hv_list:
4383 # if the hypervisor doesn't already exist in the cluster
4384 # hvparams, we initialize it to empty, and then (in both
4385 # cases) we make sure to fill the defaults, as we might not
4386 # have a complete defaults list if the hypervisor wasn't
4388 if hv not in new_hvp:
4390 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4391 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4393 self.hv_list = cluster.enabled_hypervisors
4395 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4396 # either the enabled list has changed, or the parameters have, validate
4397 for hv_name, hv_params in self.new_hvparams.items():
4398 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4399 (self.op.enabled_hypervisors and
4400 hv_name in self.op.enabled_hypervisors)):
4401 # either this is a new hypervisor, or its parameters have changed
4402 hv_class = hypervisor.GetHypervisorClass(hv_name)
4403 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4404 hv_class.CheckParameterSyntax(hv_params)
4405 _CheckHVParams(self, node_list, hv_name, hv_params)
4408 # no need to check any newly-enabled hypervisors, since the
4409 # defaults have already been checked in the above code-block
4410 for os_name, os_hvp in self.new_os_hvp.items():
4411 for hv_name, hv_params in os_hvp.items():
4412 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4413 # we need to fill in the new os_hvp on top of the actual hv_p
4414 cluster_defaults = self.new_hvparams.get(hv_name, {})
4415 new_osp = objects.FillDict(cluster_defaults, hv_params)
4416 hv_class = hypervisor.GetHypervisorClass(hv_name)
4417 hv_class.CheckParameterSyntax(new_osp)
4418 _CheckHVParams(self, node_list, hv_name, new_osp)
4420 if self.op.default_iallocator:
4421 alloc_script = utils.FindFile(self.op.default_iallocator,
4422 constants.IALLOCATOR_SEARCH_PATH,
4424 if alloc_script is None:
4425 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4426 " specified" % self.op.default_iallocator,
4429 def Exec(self, feedback_fn):
4430 """Change the parameters of the cluster.
4433 if self.op.vg_name is not None:
4434 new_volume = self.op.vg_name
4437 if new_volume != self.cfg.GetVGName():
4438 self.cfg.SetVGName(new_volume)
4440 feedback_fn("Cluster LVM configuration already in desired"
4441 " state, not changing")
4442 if self.op.drbd_helper is not None:
4443 new_helper = self.op.drbd_helper
4446 if new_helper != self.cfg.GetDRBDHelper():
4447 self.cfg.SetDRBDHelper(new_helper)
4449 feedback_fn("Cluster DRBD helper already in desired state,"
4451 if self.op.hvparams:
4452 self.cluster.hvparams = self.new_hvparams
4454 self.cluster.os_hvp = self.new_os_hvp
4455 if self.op.enabled_hypervisors is not None:
4456 self.cluster.hvparams = self.new_hvparams
4457 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4458 if self.op.beparams:
4459 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4460 if self.op.nicparams:
4461 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4463 self.cluster.ipolicy = self.new_ipolicy
4464 if self.op.osparams:
4465 self.cluster.osparams = self.new_osp
4466 if self.op.ndparams:
4467 self.cluster.ndparams = self.new_ndparams
4468 if self.op.diskparams:
4469 self.cluster.diskparams = self.new_diskparams
4470 if self.op.hv_state:
4471 self.cluster.hv_state_static = self.new_hv_state
4472 if self.op.disk_state:
4473 self.cluster.disk_state_static = self.new_disk_state
4475 if self.op.candidate_pool_size is not None:
4476 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4477 # we need to update the pool size here, otherwise the save will fail
4478 _AdjustCandidatePool(self, [])
4480 if self.op.maintain_node_health is not None:
4481 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4482 feedback_fn("Note: CONFD was disabled at build time, node health"
4483 " maintenance is not useful (still enabling it)")
4484 self.cluster.maintain_node_health = self.op.maintain_node_health
4486 if self.op.prealloc_wipe_disks is not None:
4487 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4489 if self.op.add_uids is not None:
4490 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4492 if self.op.remove_uids is not None:
4493 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4495 if self.op.uid_pool is not None:
4496 self.cluster.uid_pool = self.op.uid_pool
4498 if self.op.default_iallocator is not None:
4499 self.cluster.default_iallocator = self.op.default_iallocator
4501 if self.op.reserved_lvs is not None:
4502 self.cluster.reserved_lvs = self.op.reserved_lvs
4504 if self.op.use_external_mip_script is not None:
4505 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4507 def helper_os(aname, mods, desc):
4509 lst = getattr(self.cluster, aname)
4510 for key, val in mods:
4511 if key == constants.DDM_ADD:
4513 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4516 elif key == constants.DDM_REMOVE:
4520 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4522 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4524 if self.op.hidden_os:
4525 helper_os("hidden_os", self.op.hidden_os, "hidden")
4527 if self.op.blacklisted_os:
4528 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4530 if self.op.master_netdev:
4531 master_params = self.cfg.GetMasterNetworkParameters()
4532 ems = self.cfg.GetUseExternalMipScript()
4533 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4534 self.cluster.master_netdev)
4535 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4537 result.Raise("Could not disable the master ip")
4538 feedback_fn("Changing master_netdev from %s to %s" %
4539 (master_params.netdev, self.op.master_netdev))
4540 self.cluster.master_netdev = self.op.master_netdev
4542 if self.op.master_netmask:
4543 master_params = self.cfg.GetMasterNetworkParameters()
4544 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4545 result = self.rpc.call_node_change_master_netmask(master_params.name,
4546 master_params.netmask,
4547 self.op.master_netmask,
4549 master_params.netdev)
4551 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4554 self.cluster.master_netmask = self.op.master_netmask
4556 self.cfg.Update(self.cluster, feedback_fn)
4558 if self.op.master_netdev:
4559 master_params = self.cfg.GetMasterNetworkParameters()
4560 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4561 self.op.master_netdev)
4562 ems = self.cfg.GetUseExternalMipScript()
4563 result = self.rpc.call_node_activate_master_ip(master_params.name,
4566 self.LogWarning("Could not re-enable the master ip on"
4567 " the master, please restart manually: %s",
4571 def _UploadHelper(lu, nodes, fname):
4572 """Helper for uploading a file and showing warnings.
4575 if os.path.exists(fname):
4576 result = lu.rpc.call_upload_file(nodes, fname)
4577 for to_node, to_result in result.items():
4578 msg = to_result.fail_msg
4580 msg = ("Copy of file %s to node %s failed: %s" %
4581 (fname, to_node, msg))
4585 def _ComputeAncillaryFiles(cluster, redist):
4586 """Compute files external to Ganeti which need to be consistent.
4588 @type redist: boolean
4589 @param redist: Whether to include files which need to be redistributed
4592 # Compute files for all nodes
4594 pathutils.SSH_KNOWN_HOSTS_FILE,
4595 pathutils.CONFD_HMAC_KEY,
4596 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4597 pathutils.SPICE_CERT_FILE,
4598 pathutils.SPICE_CACERT_FILE,
4599 pathutils.RAPI_USERS_FILE,
4603 # we need to ship at least the RAPI certificate
4604 files_all.add(pathutils.RAPI_CERT_FILE)
4606 files_all.update(pathutils.ALL_CERT_FILES)
4607 files_all.update(ssconf.SimpleStore().GetFileList())
4609 if cluster.modify_etc_hosts:
4610 files_all.add(pathutils.ETC_HOSTS)
4612 if cluster.use_external_mip_script:
4613 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4615 # Files which are optional, these must:
4616 # - be present in one other category as well
4617 # - either exist or not exist on all nodes of that category (mc, vm all)
4619 pathutils.RAPI_USERS_FILE,
4622 # Files which should only be on master candidates
4626 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4630 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4631 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4632 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4634 # Files which should only be on VM-capable nodes
4637 for hv_name in cluster.enabled_hypervisors
4639 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4643 for hv_name in cluster.enabled_hypervisors
4645 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4647 # Filenames in each category must be unique
4648 all_files_set = files_all | files_mc | files_vm
4649 assert (len(all_files_set) ==
4650 sum(map(len, [files_all, files_mc, files_vm]))), \
4651 "Found file listed in more than one file list"
4653 # Optional files must be present in one other category
4654 assert all_files_set.issuperset(files_opt), \
4655 "Optional file not in a different required list"
4657 # This one file should never ever be re-distributed via RPC
4658 assert not (redist and
4659 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4661 return (files_all, files_opt, files_mc, files_vm)
4664 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4665 """Distribute additional files which are part of the cluster configuration.
4667 ConfigWriter takes care of distributing the config and ssconf files, but
4668 there are more files which should be distributed to all nodes. This function
4669 makes sure those are copied.
4671 @param lu: calling logical unit
4672 @param additional_nodes: list of nodes not in the config to distribute to
4673 @type additional_vm: boolean
4674 @param additional_vm: whether the additional nodes are vm-capable or not
4677 # Gather target nodes
4678 cluster = lu.cfg.GetClusterInfo()
4679 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4681 online_nodes = lu.cfg.GetOnlineNodeList()
4682 online_set = frozenset(online_nodes)
4683 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4685 if additional_nodes is not None:
4686 online_nodes.extend(additional_nodes)
4688 vm_nodes.extend(additional_nodes)
4690 # Never distribute to master node
4691 for nodelist in [online_nodes, vm_nodes]:
4692 if master_info.name in nodelist:
4693 nodelist.remove(master_info.name)
4696 (files_all, _, files_mc, files_vm) = \
4697 _ComputeAncillaryFiles(cluster, True)
4699 # Never re-distribute configuration file from here
4700 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4701 pathutils.CLUSTER_CONF_FILE in files_vm)
4702 assert not files_mc, "Master candidates not handled in this function"
4705 (online_nodes, files_all),
4706 (vm_nodes, files_vm),
4710 for (node_list, files) in filemap:
4712 _UploadHelper(lu, node_list, fname)
4715 class LUClusterRedistConf(NoHooksLU):
4716 """Force the redistribution of cluster configuration.
4718 This is a very simple LU.
4723 def ExpandNames(self):
4724 self.needed_locks = {
4725 locking.LEVEL_NODE: locking.ALL_SET,
4726 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4728 self.share_locks = _ShareAll()
4730 def Exec(self, feedback_fn):
4731 """Redistribute the configuration.
4734 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4735 _RedistributeAncillaryFiles(self)
4738 class LUClusterActivateMasterIp(NoHooksLU):
4739 """Activate the master IP on the master node.
4742 def Exec(self, feedback_fn):
4743 """Activate the master IP.
4746 master_params = self.cfg.GetMasterNetworkParameters()
4747 ems = self.cfg.GetUseExternalMipScript()
4748 result = self.rpc.call_node_activate_master_ip(master_params.name,
4750 result.Raise("Could not activate the master IP")
4753 class LUClusterDeactivateMasterIp(NoHooksLU):
4754 """Deactivate the master IP on the master node.
4757 def Exec(self, feedback_fn):
4758 """Deactivate the master IP.
4761 master_params = self.cfg.GetMasterNetworkParameters()
4762 ems = self.cfg.GetUseExternalMipScript()
4763 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4765 result.Raise("Could not deactivate the master IP")
4768 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4769 """Sleep and poll for an instance's disk to sync.
4772 if not instance.disks or disks is not None and not disks:
4775 disks = _ExpandCheckDisks(instance, disks)
4778 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4780 node = instance.primary_node
4783 lu.cfg.SetDiskID(dev, node)
4785 # TODO: Convert to utils.Retry
4788 degr_retries = 10 # in seconds, as we sleep 1 second each time
4792 cumul_degraded = False
4793 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4794 msg = rstats.fail_msg
4796 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4799 raise errors.RemoteError("Can't contact node %s for mirror data,"
4800 " aborting." % node)
4803 rstats = rstats.payload
4805 for i, mstat in enumerate(rstats):
4807 lu.LogWarning("Can't compute data for node %s/%s",
4808 node, disks[i].iv_name)
4811 cumul_degraded = (cumul_degraded or
4812 (mstat.is_degraded and mstat.sync_percent is None))
4813 if mstat.sync_percent is not None:
4815 if mstat.estimated_time is not None:
4816 rem_time = ("%s remaining (estimated)" %
4817 utils.FormatSeconds(mstat.estimated_time))
4818 max_time = mstat.estimated_time
4820 rem_time = "no time estimate"
4821 lu.LogInfo("- device %s: %5.2f%% done, %s",
4822 disks[i].iv_name, mstat.sync_percent, rem_time)
4824 # if we're done but degraded, let's do a few small retries, to
4825 # make sure we see a stable and not transient situation; therefore
4826 # we force restart of the loop
4827 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4828 logging.info("Degraded disks found, %d retries left", degr_retries)
4836 time.sleep(min(60, max_time))
4839 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4841 return not cumul_degraded
4844 def _BlockdevFind(lu, node, dev, instance):
4845 """Wrapper around call_blockdev_find to annotate diskparams.
4847 @param lu: A reference to the lu object
4848 @param node: The node to call out
4849 @param dev: The device to find
4850 @param instance: The instance object the device belongs to
4851 @returns The result of the rpc call
4854 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4855 return lu.rpc.call_blockdev_find(node, disk)
4858 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4859 """Wrapper around L{_CheckDiskConsistencyInner}.
4862 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4863 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4867 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4869 """Check that mirrors are not degraded.
4871 @attention: The device has to be annotated already.
4873 The ldisk parameter, if True, will change the test from the
4874 is_degraded attribute (which represents overall non-ok status for
4875 the device(s)) to the ldisk (representing the local storage status).
4878 lu.cfg.SetDiskID(dev, node)
4882 if on_primary or dev.AssembleOnSecondary():
4883 rstats = lu.rpc.call_blockdev_find(node, dev)
4884 msg = rstats.fail_msg
4886 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4888 elif not rstats.payload:
4889 lu.LogWarning("Can't find disk on node %s", node)
4893 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4895 result = result and not rstats.payload.is_degraded
4898 for child in dev.children:
4899 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4905 class LUOobCommand(NoHooksLU):
4906 """Logical unit for OOB handling.
4910 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4912 def ExpandNames(self):
4913 """Gather locks we need.
4916 if self.op.node_names:
4917 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4918 lock_names = self.op.node_names
4920 lock_names = locking.ALL_SET
4922 self.needed_locks = {
4923 locking.LEVEL_NODE: lock_names,
4926 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4928 if not self.op.node_names:
4929 # Acquire node allocation lock only if all nodes are affected
4930 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4932 def CheckPrereq(self):
4933 """Check prerequisites.
4936 - the node exists in the configuration
4939 Any errors are signaled by raising errors.OpPrereqError.
4943 self.master_node = self.cfg.GetMasterNode()
4945 assert self.op.power_delay >= 0.0
4947 if self.op.node_names:
4948 if (self.op.command in self._SKIP_MASTER and
4949 self.master_node in self.op.node_names):
4950 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4951 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4953 if master_oob_handler:
4954 additional_text = ("run '%s %s %s' if you want to operate on the"
4955 " master regardless") % (master_oob_handler,
4959 additional_text = "it does not support out-of-band operations"
4961 raise errors.OpPrereqError(("Operating on the master node %s is not"
4962 " allowed for %s; %s") %
4963 (self.master_node, self.op.command,
4964 additional_text), errors.ECODE_INVAL)
4966 self.op.node_names = self.cfg.GetNodeList()
4967 if self.op.command in self._SKIP_MASTER:
4968 self.op.node_names.remove(self.master_node)
4970 if self.op.command in self._SKIP_MASTER:
4971 assert self.master_node not in self.op.node_names
4973 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4975 raise errors.OpPrereqError("Node %s not found" % node_name,
4978 self.nodes.append(node)
4980 if (not self.op.ignore_status and
4981 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4982 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4983 " not marked offline") % node_name,
4986 def Exec(self, feedback_fn):
4987 """Execute OOB and return result if we expect any.
4990 master_node = self.master_node
4993 for idx, node in enumerate(utils.NiceSort(self.nodes,
4994 key=lambda node: node.name)):
4995 node_entry = [(constants.RS_NORMAL, node.name)]
4996 ret.append(node_entry)
4998 oob_program = _SupportsOob(self.cfg, node)
5001 node_entry.append((constants.RS_UNAVAIL, None))
5004 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5005 self.op.command, oob_program, node.name)
5006 result = self.rpc.call_run_oob(master_node, oob_program,
5007 self.op.command, node.name,
5011 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5012 node.name, result.fail_msg)
5013 node_entry.append((constants.RS_NODATA, None))
5016 self._CheckPayload(result)
5017 except errors.OpExecError, err:
5018 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5020 node_entry.append((constants.RS_NODATA, None))
5022 if self.op.command == constants.OOB_HEALTH:
5023 # For health we should log important events
5024 for item, status in result.payload:
5025 if status in [constants.OOB_STATUS_WARNING,
5026 constants.OOB_STATUS_CRITICAL]:
5027 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5028 item, node.name, status)
5030 if self.op.command == constants.OOB_POWER_ON:
5032 elif self.op.command == constants.OOB_POWER_OFF:
5033 node.powered = False
5034 elif self.op.command == constants.OOB_POWER_STATUS:
5035 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5036 if powered != node.powered:
5037 logging.warning(("Recorded power state (%s) of node '%s' does not"
5038 " match actual power state (%s)"), node.powered,
5041 # For configuration changing commands we should update the node
5042 if self.op.command in (constants.OOB_POWER_ON,
5043 constants.OOB_POWER_OFF):
5044 self.cfg.Update(node, feedback_fn)
5046 node_entry.append((constants.RS_NORMAL, result.payload))
5048 if (self.op.command == constants.OOB_POWER_ON and
5049 idx < len(self.nodes) - 1):
5050 time.sleep(self.op.power_delay)
5054 def _CheckPayload(self, result):
5055 """Checks if the payload is valid.
5057 @param result: RPC result
5058 @raises errors.OpExecError: If payload is not valid
5062 if self.op.command == constants.OOB_HEALTH:
5063 if not isinstance(result.payload, list):
5064 errs.append("command 'health' is expected to return a list but got %s" %
5065 type(result.payload))
5067 for item, status in result.payload:
5068 if status not in constants.OOB_STATUSES:
5069 errs.append("health item '%s' has invalid status '%s'" %
5072 if self.op.command == constants.OOB_POWER_STATUS:
5073 if not isinstance(result.payload, dict):
5074 errs.append("power-status is expected to return a dict but got %s" %
5075 type(result.payload))
5077 if self.op.command in [
5078 constants.OOB_POWER_ON,
5079 constants.OOB_POWER_OFF,
5080 constants.OOB_POWER_CYCLE,
5082 if result.payload is not None:
5083 errs.append("%s is expected to not return payload but got '%s'" %
5084 (self.op.command, result.payload))
5087 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5088 utils.CommaJoin(errs))
5091 class _OsQuery(_QueryBase):
5092 FIELDS = query.OS_FIELDS
5094 def ExpandNames(self, lu):
5095 # Lock all nodes in shared mode
5096 # Temporary removal of locks, should be reverted later
5097 # TODO: reintroduce locks when they are lighter-weight
5098 lu.needed_locks = {}
5099 #self.share_locks[locking.LEVEL_NODE] = 1
5100 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5102 # The following variables interact with _QueryBase._GetNames
5104 self.wanted = self.names
5106 self.wanted = locking.ALL_SET
5108 self.do_locking = self.use_locking
5110 def DeclareLocks(self, lu, level):
5114 def _DiagnoseByOS(rlist):
5115 """Remaps a per-node return list into an a per-os per-node dictionary
5117 @param rlist: a map with node names as keys and OS objects as values
5120 @return: a dictionary with osnames as keys and as value another
5121 map, with nodes as keys and tuples of (path, status, diagnose,
5122 variants, parameters, api_versions) as values, eg::
5124 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5125 (/srv/..., False, "invalid api")],
5126 "node2": [(/srv/..., True, "", [], [])]}
5131 # we build here the list of nodes that didn't fail the RPC (at RPC
5132 # level), so that nodes with a non-responding node daemon don't
5133 # make all OSes invalid
5134 good_nodes = [node_name for node_name in rlist
5135 if not rlist[node_name].fail_msg]
5136 for node_name, nr in rlist.items():
5137 if nr.fail_msg or not nr.payload:
5139 for (name, path, status, diagnose, variants,
5140 params, api_versions) in nr.payload:
5141 if name not in all_os:
5142 # build a list of nodes for this os containing empty lists
5143 # for each node in node_list
5145 for nname in good_nodes:
5146 all_os[name][nname] = []
5147 # convert params from [name, help] to (name, help)
5148 params = [tuple(v) for v in params]
5149 all_os[name][node_name].append((path, status, diagnose,
5150 variants, params, api_versions))
5153 def _GetQueryData(self, lu):
5154 """Computes the list of nodes and their attributes.
5157 # Locking is not used
5158 assert not (compat.any(lu.glm.is_owned(level)
5159 for level in locking.LEVELS
5160 if level != locking.LEVEL_CLUSTER) or
5161 self.do_locking or self.use_locking)
5163 valid_nodes = [node.name
5164 for node in lu.cfg.GetAllNodesInfo().values()
5165 if not node.offline and node.vm_capable]
5166 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5167 cluster = lu.cfg.GetClusterInfo()
5171 for (os_name, os_data) in pol.items():
5172 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5173 hidden=(os_name in cluster.hidden_os),
5174 blacklisted=(os_name in cluster.blacklisted_os))
5178 api_versions = set()
5180 for idx, osl in enumerate(os_data.values()):
5181 info.valid = bool(info.valid and osl and osl[0][1])
5185 (node_variants, node_params, node_api) = osl[0][3:6]
5188 variants.update(node_variants)
5189 parameters.update(node_params)
5190 api_versions.update(node_api)
5192 # Filter out inconsistent values
5193 variants.intersection_update(node_variants)
5194 parameters.intersection_update(node_params)
5195 api_versions.intersection_update(node_api)
5197 info.variants = list(variants)
5198 info.parameters = list(parameters)
5199 info.api_versions = list(api_versions)
5201 data[os_name] = info
5203 # Prepare data in requested order
5204 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5208 class LUOsDiagnose(NoHooksLU):
5209 """Logical unit for OS diagnose/query.
5215 def _BuildFilter(fields, names):
5216 """Builds a filter for querying OSes.
5219 name_filter = qlang.MakeSimpleFilter("name", names)
5221 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5222 # respective field is not requested
5223 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5224 for fname in ["hidden", "blacklisted"]
5225 if fname not in fields]
5226 if "valid" not in fields:
5227 status_filter.append([qlang.OP_TRUE, "valid"])
5230 status_filter.insert(0, qlang.OP_AND)
5232 status_filter = None
5234 if name_filter and status_filter:
5235 return [qlang.OP_AND, name_filter, status_filter]
5239 return status_filter
5241 def CheckArguments(self):
5242 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5243 self.op.output_fields, False)
5245 def ExpandNames(self):
5246 self.oq.ExpandNames(self)
5248 def Exec(self, feedback_fn):
5249 return self.oq.OldStyleQuery(self)
5252 class _ExtStorageQuery(_QueryBase):
5253 FIELDS = query.EXTSTORAGE_FIELDS
5255 def ExpandNames(self, lu):
5256 # Lock all nodes in shared mode
5257 # Temporary removal of locks, should be reverted later
5258 # TODO: reintroduce locks when they are lighter-weight
5259 lu.needed_locks = {}
5260 #self.share_locks[locking.LEVEL_NODE] = 1
5261 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5263 # The following variables interact with _QueryBase._GetNames
5265 self.wanted = self.names
5267 self.wanted = locking.ALL_SET
5269 self.do_locking = self.use_locking
5271 def DeclareLocks(self, lu, level):
5275 def _DiagnoseByProvider(rlist):
5276 """Remaps a per-node return list into an a per-provider per-node dictionary
5278 @param rlist: a map with node names as keys and ExtStorage objects as values
5281 @return: a dictionary with extstorage providers as keys and as
5282 value another map, with nodes as keys and tuples of
5283 (path, status, diagnose, parameters) as values, eg::
5285 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5286 "node2": [(/srv/..., False, "missing file")]
5287 "node3": [(/srv/..., True, "", [])]
5292 # we build here the list of nodes that didn't fail the RPC (at RPC
5293 # level), so that nodes with a non-responding node daemon don't
5294 # make all OSes invalid
5295 good_nodes = [node_name for node_name in rlist
5296 if not rlist[node_name].fail_msg]
5297 for node_name, nr in rlist.items():
5298 if nr.fail_msg or not nr.payload:
5300 for (name, path, status, diagnose, params) in nr.payload:
5301 if name not in all_es:
5302 # build a list of nodes for this os containing empty lists
5303 # for each node in node_list
5305 for nname in good_nodes:
5306 all_es[name][nname] = []
5307 # convert params from [name, help] to (name, help)
5308 params = [tuple(v) for v in params]
5309 all_es[name][node_name].append((path, status, diagnose, params))
5312 def _GetQueryData(self, lu):
5313 """Computes the list of nodes and their attributes.
5316 # Locking is not used
5317 assert not (compat.any(lu.glm.is_owned(level)
5318 for level in locking.LEVELS
5319 if level != locking.LEVEL_CLUSTER) or
5320 self.do_locking or self.use_locking)
5322 valid_nodes = [node.name
5323 for node in lu.cfg.GetAllNodesInfo().values()
5324 if not node.offline and node.vm_capable]
5325 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5329 nodegroup_list = lu.cfg.GetNodeGroupList()
5331 for (es_name, es_data) in pol.items():
5332 # For every provider compute the nodegroup validity.
5333 # To do this we need to check the validity of each node in es_data
5334 # and then construct the corresponding nodegroup dict:
5335 # { nodegroup1: status
5336 # nodegroup2: status
5339 for nodegroup in nodegroup_list:
5340 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5342 nodegroup_nodes = ndgrp.members
5343 nodegroup_name = ndgrp.name
5346 for node in nodegroup_nodes:
5347 if node in valid_nodes:
5348 if es_data[node] != []:
5349 node_status = es_data[node][0][1]
5350 node_statuses.append(node_status)
5352 node_statuses.append(False)
5354 if False in node_statuses:
5355 ndgrp_data[nodegroup_name] = False
5357 ndgrp_data[nodegroup_name] = True
5359 # Compute the provider's parameters
5361 for idx, esl in enumerate(es_data.values()):
5362 valid = bool(esl and esl[0][1])
5366 node_params = esl[0][3]
5369 parameters.update(node_params)
5371 # Filter out inconsistent values
5372 parameters.intersection_update(node_params)
5374 params = list(parameters)
5376 # Now fill all the info for this provider
5377 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5378 nodegroup_status=ndgrp_data,
5381 data[es_name] = info
5383 # Prepare data in requested order
5384 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5388 class LUExtStorageDiagnose(NoHooksLU):
5389 """Logical unit for ExtStorage diagnose/query.
5394 def CheckArguments(self):
5395 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5396 self.op.output_fields, False)
5398 def ExpandNames(self):
5399 self.eq.ExpandNames(self)
5401 def Exec(self, feedback_fn):
5402 return self.eq.OldStyleQuery(self)
5405 class LUNodeRemove(LogicalUnit):
5406 """Logical unit for removing a node.
5409 HPATH = "node-remove"
5410 HTYPE = constants.HTYPE_NODE
5412 def BuildHooksEnv(self):
5417 "OP_TARGET": self.op.node_name,
5418 "NODE_NAME": self.op.node_name,
5421 def BuildHooksNodes(self):
5422 """Build hooks nodes.
5424 This doesn't run on the target node in the pre phase as a failed
5425 node would then be impossible to remove.
5428 all_nodes = self.cfg.GetNodeList()
5430 all_nodes.remove(self.op.node_name)
5433 return (all_nodes, all_nodes)
5435 def CheckPrereq(self):
5436 """Check prerequisites.
5439 - the node exists in the configuration
5440 - it does not have primary or secondary instances
5441 - it's not the master
5443 Any errors are signaled by raising errors.OpPrereqError.
5446 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5447 node = self.cfg.GetNodeInfo(self.op.node_name)
5448 assert node is not None
5450 masternode = self.cfg.GetMasterNode()
5451 if node.name == masternode:
5452 raise errors.OpPrereqError("Node is the master node, failover to another"
5453 " node is required", errors.ECODE_INVAL)
5455 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5456 if node.name in instance.all_nodes:
5457 raise errors.OpPrereqError("Instance %s is still running on the node,"
5458 " please remove first" % instance_name,
5460 self.op.node_name = node.name
5463 def Exec(self, feedback_fn):
5464 """Removes the node from the cluster.
5468 logging.info("Stopping the node daemon and removing configs from node %s",
5471 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5473 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5476 # Promote nodes to master candidate as needed
5477 _AdjustCandidatePool(self, exceptions=[node.name])
5478 self.context.RemoveNode(node.name)
5480 # Run post hooks on the node before it's removed
5481 _RunPostHook(self, node.name)
5483 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5484 msg = result.fail_msg
5486 self.LogWarning("Errors encountered on the remote node while leaving"
5487 " the cluster: %s", msg)
5489 # Remove node from our /etc/hosts
5490 if self.cfg.GetClusterInfo().modify_etc_hosts:
5491 master_node = self.cfg.GetMasterNode()
5492 result = self.rpc.call_etc_hosts_modify(master_node,
5493 constants.ETC_HOSTS_REMOVE,
5495 result.Raise("Can't update hosts file with new host data")
5496 _RedistributeAncillaryFiles(self)
5499 class _NodeQuery(_QueryBase):
5500 FIELDS = query.NODE_FIELDS
5502 def ExpandNames(self, lu):
5503 lu.needed_locks = {}
5504 lu.share_locks = _ShareAll()
5507 self.wanted = _GetWantedNodes(lu, self.names)
5509 self.wanted = locking.ALL_SET
5511 self.do_locking = (self.use_locking and
5512 query.NQ_LIVE in self.requested_data)
5515 # If any non-static field is requested we need to lock the nodes
5516 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5517 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5519 def DeclareLocks(self, lu, level):
5522 def _GetQueryData(self, lu):
5523 """Computes the list of nodes and their attributes.
5526 all_info = lu.cfg.GetAllNodesInfo()
5528 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5530 # Gather data as requested
5531 if query.NQ_LIVE in self.requested_data:
5532 # filter out non-vm_capable nodes
5533 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5535 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5536 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5537 [lu.cfg.GetHypervisorType()], es_flags)
5538 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5539 for (name, nresult) in node_data.items()
5540 if not nresult.fail_msg and nresult.payload)
5544 if query.NQ_INST in self.requested_data:
5545 node_to_primary = dict([(name, set()) for name in nodenames])
5546 node_to_secondary = dict([(name, set()) for name in nodenames])
5548 inst_data = lu.cfg.GetAllInstancesInfo()
5550 for inst in inst_data.values():
5551 if inst.primary_node in node_to_primary:
5552 node_to_primary[inst.primary_node].add(inst.name)
5553 for secnode in inst.secondary_nodes:
5554 if secnode in node_to_secondary:
5555 node_to_secondary[secnode].add(inst.name)
5557 node_to_primary = None
5558 node_to_secondary = None
5560 if query.NQ_OOB in self.requested_data:
5561 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5562 for name, node in all_info.iteritems())
5566 if query.NQ_GROUP in self.requested_data:
5567 groups = lu.cfg.GetAllNodeGroupsInfo()
5571 return query.NodeQueryData([all_info[name] for name in nodenames],
5572 live_data, lu.cfg.GetMasterNode(),
5573 node_to_primary, node_to_secondary, groups,
5574 oob_support, lu.cfg.GetClusterInfo())
5577 class LUNodeQuery(NoHooksLU):
5578 """Logical unit for querying nodes.
5581 # pylint: disable=W0142
5584 def CheckArguments(self):
5585 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5586 self.op.output_fields, self.op.use_locking)
5588 def ExpandNames(self):
5589 self.nq.ExpandNames(self)
5591 def DeclareLocks(self, level):
5592 self.nq.DeclareLocks(self, level)
5594 def Exec(self, feedback_fn):
5595 return self.nq.OldStyleQuery(self)
5598 class LUNodeQueryvols(NoHooksLU):
5599 """Logical unit for getting volumes on node(s).
5603 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5604 _FIELDS_STATIC = utils.FieldSet("node")
5606 def CheckArguments(self):
5607 _CheckOutputFields(static=self._FIELDS_STATIC,
5608 dynamic=self._FIELDS_DYNAMIC,
5609 selected=self.op.output_fields)
5611 def ExpandNames(self):
5612 self.share_locks = _ShareAll()
5615 self.needed_locks = {
5616 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5619 self.needed_locks = {
5620 locking.LEVEL_NODE: locking.ALL_SET,
5621 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5624 def Exec(self, feedback_fn):
5625 """Computes the list of nodes and their attributes.
5628 nodenames = self.owned_locks(locking.LEVEL_NODE)
5629 volumes = self.rpc.call_node_volumes(nodenames)
5631 ilist = self.cfg.GetAllInstancesInfo()
5632 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5635 for node in nodenames:
5636 nresult = volumes[node]
5639 msg = nresult.fail_msg
5641 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5644 node_vols = sorted(nresult.payload,
5645 key=operator.itemgetter("dev"))
5647 for vol in node_vols:
5649 for field in self.op.output_fields:
5652 elif field == "phys":
5656 elif field == "name":
5658 elif field == "size":
5659 val = int(float(vol["size"]))
5660 elif field == "instance":
5661 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5663 raise errors.ParameterError(field)
5664 node_output.append(str(val))
5666 output.append(node_output)
5671 class LUNodeQueryStorage(NoHooksLU):
5672 """Logical unit for getting information on storage units on node(s).
5675 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5678 def CheckArguments(self):
5679 _CheckOutputFields(static=self._FIELDS_STATIC,
5680 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5681 selected=self.op.output_fields)
5683 def ExpandNames(self):
5684 self.share_locks = _ShareAll()
5687 self.needed_locks = {
5688 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5691 self.needed_locks = {
5692 locking.LEVEL_NODE: locking.ALL_SET,
5693 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5696 def Exec(self, feedback_fn):
5697 """Computes the list of nodes and their attributes.
5700 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5702 # Always get name to sort by
5703 if constants.SF_NAME in self.op.output_fields:
5704 fields = self.op.output_fields[:]
5706 fields = [constants.SF_NAME] + self.op.output_fields
5708 # Never ask for node or type as it's only known to the LU
5709 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5710 while extra in fields:
5711 fields.remove(extra)
5713 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5714 name_idx = field_idx[constants.SF_NAME]
5716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5717 data = self.rpc.call_storage_list(self.nodes,
5718 self.op.storage_type, st_args,
5719 self.op.name, fields)
5723 for node in utils.NiceSort(self.nodes):
5724 nresult = data[node]
5728 msg = nresult.fail_msg
5730 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5733 rows = dict([(row[name_idx], row) for row in nresult.payload])
5735 for name in utils.NiceSort(rows.keys()):
5740 for field in self.op.output_fields:
5741 if field == constants.SF_NODE:
5743 elif field == constants.SF_TYPE:
5744 val = self.op.storage_type
5745 elif field in field_idx:
5746 val = row[field_idx[field]]
5748 raise errors.ParameterError(field)
5757 class _InstanceQuery(_QueryBase):
5758 FIELDS = query.INSTANCE_FIELDS
5760 def ExpandNames(self, lu):
5761 lu.needed_locks = {}
5762 lu.share_locks = _ShareAll()
5765 self.wanted = _GetWantedInstances(lu, self.names)
5767 self.wanted = locking.ALL_SET
5769 self.do_locking = (self.use_locking and
5770 query.IQ_LIVE in self.requested_data)
5772 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5773 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5774 lu.needed_locks[locking.LEVEL_NODE] = []
5775 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5777 self.do_grouplocks = (self.do_locking and
5778 query.IQ_NODES in self.requested_data)
5780 def DeclareLocks(self, lu, level):
5782 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5783 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5785 # Lock all groups used by instances optimistically; this requires going
5786 # via the node before it's locked, requiring verification later on
5787 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5789 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5790 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5791 elif level == locking.LEVEL_NODE:
5792 lu._LockInstancesNodes() # pylint: disable=W0212
5795 def _CheckGroupLocks(lu):
5796 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5797 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5799 # Check if node groups for locked instances are still correct
5800 for instance_name in owned_instances:
5801 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5803 def _GetQueryData(self, lu):
5804 """Computes the list of instances and their attributes.
5807 if self.do_grouplocks:
5808 self._CheckGroupLocks(lu)
5810 cluster = lu.cfg.GetClusterInfo()
5811 all_info = lu.cfg.GetAllInstancesInfo()
5813 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5815 instance_list = [all_info[name] for name in instance_names]
5816 nodes = frozenset(itertools.chain(*(inst.all_nodes
5817 for inst in instance_list)))
5818 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5821 wrongnode_inst = set()
5823 # Gather data as requested
5824 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5826 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5828 result = node_data[name]
5830 # offline nodes will be in both lists
5831 assert result.fail_msg
5832 offline_nodes.append(name)
5834 bad_nodes.append(name)
5835 elif result.payload:
5836 for inst in result.payload:
5837 if inst in all_info:
5838 if all_info[inst].primary_node == name:
5839 live_data.update(result.payload)
5841 wrongnode_inst.add(inst)
5843 # orphan instance; we don't list it here as we don't
5844 # handle this case yet in the output of instance listing
5845 logging.warning("Orphan instance '%s' found on node %s",
5847 # else no instance is alive
5851 if query.IQ_DISKUSAGE in self.requested_data:
5852 gmi = ganeti.masterd.instance
5853 disk_usage = dict((inst.name,
5854 gmi.ComputeDiskSize(inst.disk_template,
5855 [{constants.IDISK_SIZE: disk.size}
5856 for disk in inst.disks]))
5857 for inst in instance_list)
5861 if query.IQ_CONSOLE in self.requested_data:
5863 for inst in instance_list:
5864 if inst.name in live_data:
5865 # Instance is running
5866 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5868 consinfo[inst.name] = None
5869 assert set(consinfo.keys()) == set(instance_names)
5873 if query.IQ_NODES in self.requested_data:
5874 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5876 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5877 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5878 for uuid in set(map(operator.attrgetter("group"),
5884 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5885 disk_usage, offline_nodes, bad_nodes,
5886 live_data, wrongnode_inst, consinfo,
5890 class LUQuery(NoHooksLU):
5891 """Query for resources/items of a certain kind.
5894 # pylint: disable=W0142
5897 def CheckArguments(self):
5898 qcls = _GetQueryImplementation(self.op.what)
5900 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5902 def ExpandNames(self):
5903 self.impl.ExpandNames(self)
5905 def DeclareLocks(self, level):
5906 self.impl.DeclareLocks(self, level)
5908 def Exec(self, feedback_fn):
5909 return self.impl.NewStyleQuery(self)
5912 class LUQueryFields(NoHooksLU):
5913 """Query for resources/items of a certain kind.
5916 # pylint: disable=W0142
5919 def CheckArguments(self):
5920 self.qcls = _GetQueryImplementation(self.op.what)
5922 def ExpandNames(self):
5923 self.needed_locks = {}
5925 def Exec(self, feedback_fn):
5926 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5929 class LUNodeModifyStorage(NoHooksLU):
5930 """Logical unit for modifying a storage volume on a node.
5935 def CheckArguments(self):
5936 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5938 storage_type = self.op.storage_type
5941 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5943 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5944 " modified" % storage_type,
5947 diff = set(self.op.changes.keys()) - modifiable
5949 raise errors.OpPrereqError("The following fields can not be modified for"
5950 " storage units of type '%s': %r" %
5951 (storage_type, list(diff)),
5954 def ExpandNames(self):
5955 self.needed_locks = {
5956 locking.LEVEL_NODE: self.op.node_name,
5959 def Exec(self, feedback_fn):
5960 """Computes the list of nodes and their attributes.
5963 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5964 result = self.rpc.call_storage_modify(self.op.node_name,
5965 self.op.storage_type, st_args,
5966 self.op.name, self.op.changes)
5967 result.Raise("Failed to modify storage unit '%s' on %s" %
5968 (self.op.name, self.op.node_name))
5971 class LUNodeAdd(LogicalUnit):
5972 """Logical unit for adding node to the cluster.
5976 HTYPE = constants.HTYPE_NODE
5977 _NFLAGS = ["master_capable", "vm_capable"]
5979 def CheckArguments(self):
5980 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5981 # validate/normalize the node name
5982 self.hostname = netutils.GetHostname(name=self.op.node_name,
5983 family=self.primary_ip_family)
5984 self.op.node_name = self.hostname.name
5986 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5987 raise errors.OpPrereqError("Cannot readd the master node",
5990 if self.op.readd and self.op.group:
5991 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5992 " being readded", errors.ECODE_INVAL)
5994 def BuildHooksEnv(self):
5997 This will run on all nodes before, and on all nodes + the new node after.
6001 "OP_TARGET": self.op.node_name,
6002 "NODE_NAME": self.op.node_name,
6003 "NODE_PIP": self.op.primary_ip,
6004 "NODE_SIP": self.op.secondary_ip,
6005 "MASTER_CAPABLE": str(self.op.master_capable),
6006 "VM_CAPABLE": str(self.op.vm_capable),
6009 def BuildHooksNodes(self):
6010 """Build hooks nodes.
6013 # Exclude added node
6014 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6015 post_nodes = pre_nodes + [self.op.node_name, ]
6017 return (pre_nodes, post_nodes)
6019 def CheckPrereq(self):
6020 """Check prerequisites.
6023 - the new node is not already in the config
6025 - its parameters (single/dual homed) matches the cluster
6027 Any errors are signaled by raising errors.OpPrereqError.
6031 hostname = self.hostname
6032 node = hostname.name
6033 primary_ip = self.op.primary_ip = hostname.ip
6034 if self.op.secondary_ip is None:
6035 if self.primary_ip_family == netutils.IP6Address.family:
6036 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6037 " IPv4 address must be given as secondary",
6039 self.op.secondary_ip = primary_ip
6041 secondary_ip = self.op.secondary_ip
6042 if not netutils.IP4Address.IsValid(secondary_ip):
6043 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6044 " address" % secondary_ip, errors.ECODE_INVAL)
6046 node_list = cfg.GetNodeList()
6047 if not self.op.readd and node in node_list:
6048 raise errors.OpPrereqError("Node %s is already in the configuration" %
6049 node, errors.ECODE_EXISTS)
6050 elif self.op.readd and node not in node_list:
6051 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6054 self.changed_primary_ip = False
6056 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6057 if self.op.readd and node == existing_node_name:
6058 if existing_node.secondary_ip != secondary_ip:
6059 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6060 " address configuration as before",
6062 if existing_node.primary_ip != primary_ip:
6063 self.changed_primary_ip = True
6067 if (existing_node.primary_ip == primary_ip or
6068 existing_node.secondary_ip == primary_ip or
6069 existing_node.primary_ip == secondary_ip or
6070 existing_node.secondary_ip == secondary_ip):
6071 raise errors.OpPrereqError("New node ip address(es) conflict with"
6072 " existing node %s" % existing_node.name,
6073 errors.ECODE_NOTUNIQUE)
6075 # After this 'if' block, None is no longer a valid value for the
6076 # _capable op attributes
6078 old_node = self.cfg.GetNodeInfo(node)
6079 assert old_node is not None, "Can't retrieve locked node %s" % node
6080 for attr in self._NFLAGS:
6081 if getattr(self.op, attr) is None:
6082 setattr(self.op, attr, getattr(old_node, attr))
6084 for attr in self._NFLAGS:
6085 if getattr(self.op, attr) is None:
6086 setattr(self.op, attr, True)
6088 if self.op.readd and not self.op.vm_capable:
6089 pri, sec = cfg.GetNodeInstances(node)
6091 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6092 " flag set to false, but it already holds"
6093 " instances" % node,
6096 # check that the type of the node (single versus dual homed) is the
6097 # same as for the master
6098 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6099 master_singlehomed = myself.secondary_ip == myself.primary_ip
6100 newbie_singlehomed = secondary_ip == primary_ip
6101 if master_singlehomed != newbie_singlehomed:
6102 if master_singlehomed:
6103 raise errors.OpPrereqError("The master has no secondary ip but the"
6104 " new node has one",
6107 raise errors.OpPrereqError("The master has a secondary ip but the"
6108 " new node doesn't have one",
6111 # checks reachability
6112 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6113 raise errors.OpPrereqError("Node not reachable by ping",
6114 errors.ECODE_ENVIRON)
6116 if not newbie_singlehomed:
6117 # check reachability from my secondary ip to newbie's secondary ip
6118 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6119 source=myself.secondary_ip):
6120 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6121 " based ping to node daemon port",
6122 errors.ECODE_ENVIRON)
6129 if self.op.master_capable:
6130 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6132 self.master_candidate = False
6135 self.new_node = old_node
6137 node_group = cfg.LookupNodeGroup(self.op.group)
6138 self.new_node = objects.Node(name=node,
6139 primary_ip=primary_ip,
6140 secondary_ip=secondary_ip,
6141 master_candidate=self.master_candidate,
6142 offline=False, drained=False,
6143 group=node_group, ndparams={})
6145 if self.op.ndparams:
6146 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6148 if self.op.hv_state:
6149 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6151 if self.op.disk_state:
6152 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6154 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6155 # it a property on the base class.
6156 rpcrunner = rpc.DnsOnlyRunner()
6157 result = rpcrunner.call_version([node])[node]
6158 result.Raise("Can't get version information from node %s" % node)
6159 if constants.PROTOCOL_VERSION == result.payload:
6160 logging.info("Communication to node %s fine, sw version %s match",
6161 node, result.payload)
6163 raise errors.OpPrereqError("Version mismatch master version %s,"
6164 " node version %s" %
6165 (constants.PROTOCOL_VERSION, result.payload),
6166 errors.ECODE_ENVIRON)
6168 vg_name = cfg.GetVGName()
6169 if vg_name is not None:
6170 vparams = {constants.NV_PVLIST: [vg_name]}
6171 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6172 if self.op.ndparams:
6173 excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6175 cname = self.cfg.GetClusterName()
6176 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6177 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6179 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6180 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6182 def Exec(self, feedback_fn):
6183 """Adds the new node to the cluster.
6186 new_node = self.new_node
6187 node = new_node.name
6189 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6192 # We adding a new node so we assume it's powered
6193 new_node.powered = True
6195 # for re-adds, reset the offline/drained/master-candidate flags;
6196 # we need to reset here, otherwise offline would prevent RPC calls
6197 # later in the procedure; this also means that if the re-add
6198 # fails, we are left with a non-offlined, broken node
6200 new_node.drained = new_node.offline = False # pylint: disable=W0201
6201 self.LogInfo("Readding a node, the offline/drained flags were reset")
6202 # if we demote the node, we do cleanup later in the procedure
6203 new_node.master_candidate = self.master_candidate
6204 if self.changed_primary_ip:
6205 new_node.primary_ip = self.op.primary_ip
6207 # copy the master/vm_capable flags
6208 for attr in self._NFLAGS:
6209 setattr(new_node, attr, getattr(self.op, attr))
6211 # notify the user about any possible mc promotion
6212 if new_node.master_candidate:
6213 self.LogInfo("Node will be a master candidate")
6215 if self.op.ndparams:
6216 new_node.ndparams = self.op.ndparams
6218 new_node.ndparams = {}
6220 if self.op.hv_state:
6221 new_node.hv_state_static = self.new_hv_state
6223 if self.op.disk_state:
6224 new_node.disk_state_static = self.new_disk_state
6226 # Add node to our /etc/hosts, and add key to known_hosts
6227 if self.cfg.GetClusterInfo().modify_etc_hosts:
6228 master_node = self.cfg.GetMasterNode()
6229 result = self.rpc.call_etc_hosts_modify(master_node,
6230 constants.ETC_HOSTS_ADD,
6233 result.Raise("Can't update hosts file with new host data")
6235 if new_node.secondary_ip != new_node.primary_ip:
6236 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6239 node_verify_list = [self.cfg.GetMasterNode()]
6240 node_verify_param = {
6241 constants.NV_NODELIST: ([node], {}),
6242 # TODO: do a node-net-test as well?
6245 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6246 self.cfg.GetClusterName())
6247 for verifier in node_verify_list:
6248 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6249 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6251 for failed in nl_payload:
6252 feedback_fn("ssh/hostname verification failed"
6253 " (checking from %s): %s" %
6254 (verifier, nl_payload[failed]))
6255 raise errors.OpExecError("ssh/hostname verification failed")
6258 _RedistributeAncillaryFiles(self)
6259 self.context.ReaddNode(new_node)
6260 # make sure we redistribute the config
6261 self.cfg.Update(new_node, feedback_fn)
6262 # and make sure the new node will not have old files around
6263 if not new_node.master_candidate:
6264 result = self.rpc.call_node_demote_from_mc(new_node.name)
6265 msg = result.fail_msg
6267 self.LogWarning("Node failed to demote itself from master"
6268 " candidate status: %s" % msg)
6270 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6271 additional_vm=self.op.vm_capable)
6272 self.context.AddNode(new_node, self.proc.GetECId())
6275 class LUNodeSetParams(LogicalUnit):
6276 """Modifies the parameters of a node.
6278 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6279 to the node role (as _ROLE_*)
6280 @cvar _R2F: a dictionary from node role to tuples of flags
6281 @cvar _FLAGS: a list of attribute names corresponding to the flags
6284 HPATH = "node-modify"
6285 HTYPE = constants.HTYPE_NODE
6287 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6289 (True, False, False): _ROLE_CANDIDATE,
6290 (False, True, False): _ROLE_DRAINED,
6291 (False, False, True): _ROLE_OFFLINE,
6292 (False, False, False): _ROLE_REGULAR,
6294 _R2F = dict((v, k) for k, v in _F2R.items())
6295 _FLAGS = ["master_candidate", "drained", "offline"]
6297 def CheckArguments(self):
6298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6299 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6300 self.op.master_capable, self.op.vm_capable,
6301 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6303 if all_mods.count(None) == len(all_mods):
6304 raise errors.OpPrereqError("Please pass at least one modification",
6306 if all_mods.count(True) > 1:
6307 raise errors.OpPrereqError("Can't set the node into more than one"
6308 " state at the same time",
6311 # Boolean value that tells us whether we might be demoting from MC
6312 self.might_demote = (self.op.master_candidate is False or
6313 self.op.offline is True or
6314 self.op.drained is True or
6315 self.op.master_capable is False)
6317 if self.op.secondary_ip:
6318 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6319 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6320 " address" % self.op.secondary_ip,
6323 self.lock_all = self.op.auto_promote and self.might_demote
6324 self.lock_instances = self.op.secondary_ip is not None
6326 def _InstanceFilter(self, instance):
6327 """Filter for getting affected instances.
6330 return (instance.disk_template in constants.DTS_INT_MIRROR and
6331 self.op.node_name in instance.all_nodes)
6333 def ExpandNames(self):
6335 self.needed_locks = {
6336 locking.LEVEL_NODE: locking.ALL_SET,
6338 # Block allocations when all nodes are locked
6339 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6342 self.needed_locks = {
6343 locking.LEVEL_NODE: self.op.node_name,
6346 # Since modifying a node can have severe effects on currently running
6347 # operations the resource lock is at least acquired in shared mode
6348 self.needed_locks[locking.LEVEL_NODE_RES] = \
6349 self.needed_locks[locking.LEVEL_NODE]
6351 # Get all locks except nodes in shared mode; they are not used for anything
6352 # but read-only access
6353 self.share_locks = _ShareAll()
6354 self.share_locks[locking.LEVEL_NODE] = 0
6355 self.share_locks[locking.LEVEL_NODE_RES] = 0
6356 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6358 if self.lock_instances:
6359 self.needed_locks[locking.LEVEL_INSTANCE] = \
6360 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6362 def BuildHooksEnv(self):
6365 This runs on the master node.
6369 "OP_TARGET": self.op.node_name,
6370 "MASTER_CANDIDATE": str(self.op.master_candidate),
6371 "OFFLINE": str(self.op.offline),
6372 "DRAINED": str(self.op.drained),
6373 "MASTER_CAPABLE": str(self.op.master_capable),
6374 "VM_CAPABLE": str(self.op.vm_capable),
6377 def BuildHooksNodes(self):
6378 """Build hooks nodes.
6381 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6384 def CheckPrereq(self):
6385 """Check prerequisites.
6387 This only checks the instance list against the existing names.
6390 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6392 if self.lock_instances:
6393 affected_instances = \
6394 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6396 # Verify instance locks
6397 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6398 wanted_instances = frozenset(affected_instances.keys())
6399 if wanted_instances - owned_instances:
6400 raise errors.OpPrereqError("Instances affected by changing node %s's"
6401 " secondary IP address have changed since"
6402 " locks were acquired, wanted '%s', have"
6403 " '%s'; retry the operation" %
6405 utils.CommaJoin(wanted_instances),
6406 utils.CommaJoin(owned_instances)),
6409 affected_instances = None
6411 if (self.op.master_candidate is not None or
6412 self.op.drained is not None or
6413 self.op.offline is not None):
6414 # we can't change the master's node flags
6415 if self.op.node_name == self.cfg.GetMasterNode():
6416 raise errors.OpPrereqError("The master role can be changed"
6417 " only via master-failover",
6420 if self.op.master_candidate and not node.master_capable:
6421 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6422 " it a master candidate" % node.name,
6425 if self.op.vm_capable is False:
6426 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6428 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6429 " the vm_capable flag" % node.name,
6432 if node.master_candidate and self.might_demote and not self.lock_all:
6433 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6434 # check if after removing the current node, we're missing master
6436 (mc_remaining, mc_should, _) = \
6437 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6438 if mc_remaining < mc_should:
6439 raise errors.OpPrereqError("Not enough master candidates, please"
6440 " pass auto promote option to allow"
6441 " promotion (--auto-promote or RAPI"
6442 " auto_promote=True)", errors.ECODE_STATE)
6444 self.old_flags = old_flags = (node.master_candidate,
6445 node.drained, node.offline)
6446 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6447 self.old_role = old_role = self._F2R[old_flags]
6449 # Check for ineffective changes
6450 for attr in self._FLAGS:
6451 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6452 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6453 setattr(self.op, attr, None)
6455 # Past this point, any flag change to False means a transition
6456 # away from the respective state, as only real changes are kept
6458 # TODO: We might query the real power state if it supports OOB
6459 if _SupportsOob(self.cfg, node):
6460 if self.op.offline is False and not (node.powered or
6461 self.op.powered is True):
6462 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6463 " offline status can be reset") %
6464 self.op.node_name, errors.ECODE_STATE)
6465 elif self.op.powered is not None:
6466 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6467 " as it does not support out-of-band"
6468 " handling") % self.op.node_name,
6471 # If we're being deofflined/drained, we'll MC ourself if needed
6472 if (self.op.drained is False or self.op.offline is False or
6473 (self.op.master_capable and not node.master_capable)):
6474 if _DecideSelfPromotion(self):
6475 self.op.master_candidate = True
6476 self.LogInfo("Auto-promoting node to master candidate")
6478 # If we're no longer master capable, we'll demote ourselves from MC
6479 if self.op.master_capable is False and node.master_candidate:
6480 self.LogInfo("Demoting from master candidate")
6481 self.op.master_candidate = False
6484 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6485 if self.op.master_candidate:
6486 new_role = self._ROLE_CANDIDATE
6487 elif self.op.drained:
6488 new_role = self._ROLE_DRAINED
6489 elif self.op.offline:
6490 new_role = self._ROLE_OFFLINE
6491 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6492 # False is still in new flags, which means we're un-setting (the
6494 new_role = self._ROLE_REGULAR
6495 else: # no new flags, nothing, keep old role
6498 self.new_role = new_role
6500 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6501 # Trying to transition out of offline status
6502 result = self.rpc.call_version([node.name])[node.name]
6504 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6505 " to report its version: %s" %
6506 (node.name, result.fail_msg),
6509 self.LogWarning("Transitioning node from offline to online state"
6510 " without using re-add. Please make sure the node"
6513 # When changing the secondary ip, verify if this is a single-homed to
6514 # multi-homed transition or vice versa, and apply the relevant
6516 if self.op.secondary_ip:
6517 # Ok even without locking, because this can't be changed by any LU
6518 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6519 master_singlehomed = master.secondary_ip == master.primary_ip
6520 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6521 if self.op.force and node.name == master.name:
6522 self.LogWarning("Transitioning from single-homed to multi-homed"
6523 " cluster; all nodes will require a secondary IP"
6526 raise errors.OpPrereqError("Changing the secondary ip on a"
6527 " single-homed cluster requires the"
6528 " --force option to be passed, and the"
6529 " target node to be the master",
6531 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6532 if self.op.force and node.name == master.name:
6533 self.LogWarning("Transitioning from multi-homed to single-homed"
6534 " cluster; secondary IP addresses will have to be"
6537 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6538 " same as the primary IP on a multi-homed"
6539 " cluster, unless the --force option is"
6540 " passed, and the target node is the"
6541 " master", errors.ECODE_INVAL)
6543 assert not (frozenset(affected_instances) -
6544 self.owned_locks(locking.LEVEL_INSTANCE))
6547 if affected_instances:
6548 msg = ("Cannot change secondary IP address: offline node has"
6549 " instances (%s) configured to use it" %
6550 utils.CommaJoin(affected_instances.keys()))
6551 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6553 # On online nodes, check that no instances are running, and that
6554 # the node has the new ip and we can reach it.
6555 for instance in affected_instances.values():
6556 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6557 msg="cannot change secondary ip")
6559 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6560 if master.name != node.name:
6561 # check reachability from master secondary ip to new secondary ip
6562 if not netutils.TcpPing(self.op.secondary_ip,
6563 constants.DEFAULT_NODED_PORT,
6564 source=master.secondary_ip):
6565 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6566 " based ping to node daemon port",
6567 errors.ECODE_ENVIRON)
6569 if self.op.ndparams:
6570 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6571 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6572 self.new_ndparams = new_ndparams
6574 if self.op.hv_state:
6575 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6576 self.node.hv_state_static)
6578 if self.op.disk_state:
6579 self.new_disk_state = \
6580 _MergeAndVerifyDiskState(self.op.disk_state,
6581 self.node.disk_state_static)
6583 def Exec(self, feedback_fn):
6588 old_role = self.old_role
6589 new_role = self.new_role
6593 if self.op.ndparams:
6594 node.ndparams = self.new_ndparams
6596 if self.op.powered is not None:
6597 node.powered = self.op.powered
6599 if self.op.hv_state:
6600 node.hv_state_static = self.new_hv_state
6602 if self.op.disk_state:
6603 node.disk_state_static = self.new_disk_state
6605 for attr in ["master_capable", "vm_capable"]:
6606 val = getattr(self.op, attr)
6608 setattr(node, attr, val)
6609 result.append((attr, str(val)))
6611 if new_role != old_role:
6612 # Tell the node to demote itself, if no longer MC and not offline
6613 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6614 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6616 self.LogWarning("Node failed to demote itself: %s", msg)
6618 new_flags = self._R2F[new_role]
6619 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6621 result.append((desc, str(nf)))
6622 (node.master_candidate, node.drained, node.offline) = new_flags
6624 # we locked all nodes, we adjust the CP before updating this node
6626 _AdjustCandidatePool(self, [node.name])
6628 if self.op.secondary_ip:
6629 node.secondary_ip = self.op.secondary_ip
6630 result.append(("secondary_ip", self.op.secondary_ip))
6632 # this will trigger configuration file update, if needed
6633 self.cfg.Update(node, feedback_fn)
6635 # this will trigger job queue propagation or cleanup if the mc
6637 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6638 self.context.ReaddNode(node)
6643 class LUNodePowercycle(NoHooksLU):
6644 """Powercycles a node.
6649 def CheckArguments(self):
6650 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6651 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6652 raise errors.OpPrereqError("The node is the master and the force"
6653 " parameter was not set",
6656 def ExpandNames(self):
6657 """Locking for PowercycleNode.
6659 This is a last-resort option and shouldn't block on other
6660 jobs. Therefore, we grab no locks.
6663 self.needed_locks = {}
6665 def Exec(self, feedback_fn):
6669 result = self.rpc.call_node_powercycle(self.op.node_name,
6670 self.cfg.GetHypervisorType())
6671 result.Raise("Failed to schedule the reboot")
6672 return result.payload
6675 class LUClusterQuery(NoHooksLU):
6676 """Query cluster configuration.
6681 def ExpandNames(self):
6682 self.needed_locks = {}
6684 def Exec(self, feedback_fn):
6685 """Return cluster config.
6688 cluster = self.cfg.GetClusterInfo()
6691 # Filter just for enabled hypervisors
6692 for os_name, hv_dict in cluster.os_hvp.items():
6693 os_hvp[os_name] = {}
6694 for hv_name, hv_params in hv_dict.items():
6695 if hv_name in cluster.enabled_hypervisors:
6696 os_hvp[os_name][hv_name] = hv_params
6698 # Convert ip_family to ip_version
6699 primary_ip_version = constants.IP4_VERSION
6700 if cluster.primary_ip_family == netutils.IP6Address.family:
6701 primary_ip_version = constants.IP6_VERSION
6704 "software_version": constants.RELEASE_VERSION,
6705 "protocol_version": constants.PROTOCOL_VERSION,
6706 "config_version": constants.CONFIG_VERSION,
6707 "os_api_version": max(constants.OS_API_VERSIONS),
6708 "export_version": constants.EXPORT_VERSION,
6709 "architecture": runtime.GetArchInfo(),
6710 "name": cluster.cluster_name,
6711 "master": cluster.master_node,
6712 "default_hypervisor": cluster.primary_hypervisor,
6713 "enabled_hypervisors": cluster.enabled_hypervisors,
6714 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6715 for hypervisor_name in cluster.enabled_hypervisors]),
6717 "beparams": cluster.beparams,
6718 "osparams": cluster.osparams,
6719 "ipolicy": cluster.ipolicy,
6720 "nicparams": cluster.nicparams,
6721 "ndparams": cluster.ndparams,
6722 "diskparams": cluster.diskparams,
6723 "candidate_pool_size": cluster.candidate_pool_size,
6724 "master_netdev": cluster.master_netdev,
6725 "master_netmask": cluster.master_netmask,
6726 "use_external_mip_script": cluster.use_external_mip_script,
6727 "volume_group_name": cluster.volume_group_name,
6728 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6729 "file_storage_dir": cluster.file_storage_dir,
6730 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6731 "maintain_node_health": cluster.maintain_node_health,
6732 "ctime": cluster.ctime,
6733 "mtime": cluster.mtime,
6734 "uuid": cluster.uuid,
6735 "tags": list(cluster.GetTags()),
6736 "uid_pool": cluster.uid_pool,
6737 "default_iallocator": cluster.default_iallocator,
6738 "reserved_lvs": cluster.reserved_lvs,
6739 "primary_ip_version": primary_ip_version,
6740 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6741 "hidden_os": cluster.hidden_os,
6742 "blacklisted_os": cluster.blacklisted_os,
6748 class LUClusterConfigQuery(NoHooksLU):
6749 """Return configuration values.
6754 def CheckArguments(self):
6755 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6757 def ExpandNames(self):
6758 self.cq.ExpandNames(self)
6760 def DeclareLocks(self, level):
6761 self.cq.DeclareLocks(self, level)
6763 def Exec(self, feedback_fn):
6764 result = self.cq.OldStyleQuery(self)
6766 assert len(result) == 1
6771 class _ClusterQuery(_QueryBase):
6772 FIELDS = query.CLUSTER_FIELDS
6774 #: Do not sort (there is only one item)
6777 def ExpandNames(self, lu):
6778 lu.needed_locks = {}
6780 # The following variables interact with _QueryBase._GetNames
6781 self.wanted = locking.ALL_SET
6782 self.do_locking = self.use_locking
6785 raise errors.OpPrereqError("Can not use locking for cluster queries",
6788 def DeclareLocks(self, lu, level):
6791 def _GetQueryData(self, lu):
6792 """Computes the list of nodes and their attributes.
6795 # Locking is not used
6796 assert not (compat.any(lu.glm.is_owned(level)
6797 for level in locking.LEVELS
6798 if level != locking.LEVEL_CLUSTER) or
6799 self.do_locking or self.use_locking)
6801 if query.CQ_CONFIG in self.requested_data:
6802 cluster = lu.cfg.GetClusterInfo()
6804 cluster = NotImplemented
6806 if query.CQ_QUEUE_DRAINED in self.requested_data:
6807 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6809 drain_flag = NotImplemented
6811 if query.CQ_WATCHER_PAUSE in self.requested_data:
6812 master_name = lu.cfg.GetMasterNode()
6814 result = lu.rpc.call_get_watcher_pause(master_name)
6815 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6818 watcher_pause = result.payload
6820 watcher_pause = NotImplemented
6822 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6825 class LUInstanceActivateDisks(NoHooksLU):
6826 """Bring up an instance's disks.
6831 def ExpandNames(self):
6832 self._ExpandAndLockInstance()
6833 self.needed_locks[locking.LEVEL_NODE] = []
6834 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6836 def DeclareLocks(self, level):
6837 if level == locking.LEVEL_NODE:
6838 self._LockInstancesNodes()
6840 def CheckPrereq(self):
6841 """Check prerequisites.
6843 This checks that the instance is in the cluster.
6846 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6847 assert self.instance is not None, \
6848 "Cannot retrieve locked instance %s" % self.op.instance_name
6849 _CheckNodeOnline(self, self.instance.primary_node)
6851 def Exec(self, feedback_fn):
6852 """Activate the disks.
6855 disks_ok, disks_info = \
6856 _AssembleInstanceDisks(self, self.instance,
6857 ignore_size=self.op.ignore_size)
6859 raise errors.OpExecError("Cannot activate block devices")
6861 if self.op.wait_for_sync:
6862 if not _WaitForSync(self, self.instance):
6863 raise errors.OpExecError("Some disks of the instance are degraded!")
6868 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6870 """Prepare the block devices for an instance.
6872 This sets up the block devices on all nodes.
6874 @type lu: L{LogicalUnit}
6875 @param lu: the logical unit on whose behalf we execute
6876 @type instance: L{objects.Instance}
6877 @param instance: the instance for whose disks we assemble
6878 @type disks: list of L{objects.Disk} or None
6879 @param disks: which disks to assemble (or all, if None)
6880 @type ignore_secondaries: boolean
6881 @param ignore_secondaries: if true, errors on secondary nodes
6882 won't result in an error return from the function
6883 @type ignore_size: boolean
6884 @param ignore_size: if true, the current known size of the disk
6885 will not be used during the disk activation, useful for cases
6886 when the size is wrong
6887 @return: False if the operation failed, otherwise a list of
6888 (host, instance_visible_name, node_visible_name)
6889 with the mapping from node devices to instance devices
6894 iname = instance.name
6895 disks = _ExpandCheckDisks(instance, disks)
6897 # With the two passes mechanism we try to reduce the window of
6898 # opportunity for the race condition of switching DRBD to primary
6899 # before handshaking occured, but we do not eliminate it
6901 # The proper fix would be to wait (with some limits) until the
6902 # connection has been made and drbd transitions from WFConnection
6903 # into any other network-connected state (Connected, SyncTarget,
6906 # 1st pass, assemble on all nodes in secondary mode
6907 for idx, inst_disk in enumerate(disks):
6908 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6910 node_disk = node_disk.Copy()
6911 node_disk.UnsetSize()
6912 lu.cfg.SetDiskID(node_disk, node)
6913 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6915 msg = result.fail_msg
6917 is_offline_secondary = (node in instance.secondary_nodes and
6919 lu.LogWarning("Could not prepare block device %s on node %s"
6920 " (is_primary=False, pass=1): %s",
6921 inst_disk.iv_name, node, msg)
6922 if not (ignore_secondaries or is_offline_secondary):
6925 # FIXME: race condition on drbd migration to primary
6927 # 2nd pass, do only the primary node
6928 for idx, inst_disk in enumerate(disks):
6931 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6932 if node != instance.primary_node:
6935 node_disk = node_disk.Copy()
6936 node_disk.UnsetSize()
6937 lu.cfg.SetDiskID(node_disk, node)
6938 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6940 msg = result.fail_msg
6942 lu.LogWarning("Could not prepare block device %s on node %s"
6943 " (is_primary=True, pass=2): %s",
6944 inst_disk.iv_name, node, msg)
6947 dev_path = result.payload
6949 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6951 # leave the disks configured for the primary node
6952 # this is a workaround that would be fixed better by
6953 # improving the logical/physical id handling
6955 lu.cfg.SetDiskID(disk, instance.primary_node)
6957 return disks_ok, device_info
6960 def _StartInstanceDisks(lu, instance, force):
6961 """Start the disks of an instance.
6964 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6965 ignore_secondaries=force)
6967 _ShutdownInstanceDisks(lu, instance)
6968 if force is not None and not force:
6970 hint=("If the message above refers to a secondary node,"
6971 " you can retry the operation using '--force'"))
6972 raise errors.OpExecError("Disk consistency error")
6975 class LUInstanceDeactivateDisks(NoHooksLU):
6976 """Shutdown an instance's disks.
6981 def ExpandNames(self):
6982 self._ExpandAndLockInstance()
6983 self.needed_locks[locking.LEVEL_NODE] = []
6984 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6986 def DeclareLocks(self, level):
6987 if level == locking.LEVEL_NODE:
6988 self._LockInstancesNodes()
6990 def CheckPrereq(self):
6991 """Check prerequisites.
6993 This checks that the instance is in the cluster.
6996 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997 assert self.instance is not None, \
6998 "Cannot retrieve locked instance %s" % self.op.instance_name
7000 def Exec(self, feedback_fn):
7001 """Deactivate the disks
7004 instance = self.instance
7006 _ShutdownInstanceDisks(self, instance)
7008 _SafeShutdownInstanceDisks(self, instance)
7011 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7012 """Shutdown block devices of an instance.
7014 This function checks if an instance is running, before calling
7015 _ShutdownInstanceDisks.
7018 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7019 _ShutdownInstanceDisks(lu, instance, disks=disks)
7022 def _ExpandCheckDisks(instance, disks):
7023 """Return the instance disks selected by the disks list
7025 @type disks: list of L{objects.Disk} or None
7026 @param disks: selected disks
7027 @rtype: list of L{objects.Disk}
7028 @return: selected instance disks to act on
7032 return instance.disks
7034 if not set(disks).issubset(instance.disks):
7035 raise errors.ProgrammerError("Can only act on disks belonging to the"
7040 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7041 """Shutdown block devices of an instance.
7043 This does the shutdown on all nodes of the instance.
7045 If the ignore_primary is false, errors on the primary node are
7050 disks = _ExpandCheckDisks(instance, disks)
7053 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7054 lu.cfg.SetDiskID(top_disk, node)
7055 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7056 msg = result.fail_msg
7058 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7059 disk.iv_name, node, msg)
7060 if ((node == instance.primary_node and not ignore_primary) or
7061 (node != instance.primary_node and not result.offline)):
7066 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7067 """Checks if a node has enough free memory.
7069 This function checks if a given node has the needed amount of free
7070 memory. In case the node has less memory or we cannot get the
7071 information from the node, this function raises an OpPrereqError
7074 @type lu: C{LogicalUnit}
7075 @param lu: a logical unit from which we get configuration data
7077 @param node: the node to check
7078 @type reason: C{str}
7079 @param reason: string to use in the error message
7080 @type requested: C{int}
7081 @param requested: the amount of memory in MiB to check for
7082 @type hypervisor_name: C{str}
7083 @param hypervisor_name: the hypervisor to ask for memory stats
7085 @return: node current free memory
7086 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7087 we cannot check the node
7090 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7091 nodeinfo[node].Raise("Can't get data from node %s" % node,
7092 prereq=True, ecode=errors.ECODE_ENVIRON)
7093 (_, _, (hv_info, )) = nodeinfo[node].payload
7095 free_mem = hv_info.get("memory_free", None)
7096 if not isinstance(free_mem, int):
7097 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7098 " was '%s'" % (node, free_mem),
7099 errors.ECODE_ENVIRON)
7100 if requested > free_mem:
7101 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7102 " needed %s MiB, available %s MiB" %
7103 (node, reason, requested, free_mem),
7108 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7109 """Checks if nodes have enough free disk space in all the VGs.
7111 This function checks if all given nodes have the needed amount of
7112 free disk. In case any node has less disk or we cannot get the
7113 information from the node, this function raises an OpPrereqError
7116 @type lu: C{LogicalUnit}
7117 @param lu: a logical unit from which we get configuration data
7118 @type nodenames: C{list}
7119 @param nodenames: the list of node names to check
7120 @type req_sizes: C{dict}
7121 @param req_sizes: the hash of vg and corresponding amount of disk in
7123 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7124 or we cannot check the node
7127 for vg, req_size in req_sizes.items():
7128 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7131 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7132 """Checks if nodes have enough free disk space in the specified VG.
7134 This function checks if all given nodes have the needed amount of
7135 free disk. In case any node has less disk or we cannot get the
7136 information from the node, this function raises an OpPrereqError
7139 @type lu: C{LogicalUnit}
7140 @param lu: a logical unit from which we get configuration data
7141 @type nodenames: C{list}
7142 @param nodenames: the list of node names to check
7144 @param vg: the volume group to check
7145 @type requested: C{int}
7146 @param requested: the amount of disk in MiB to check for
7147 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7148 or we cannot check the node
7151 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7152 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7153 for node in nodenames:
7154 info = nodeinfo[node]
7155 info.Raise("Cannot get current information from node %s" % node,
7156 prereq=True, ecode=errors.ECODE_ENVIRON)
7157 (_, (vg_info, ), _) = info.payload
7158 vg_free = vg_info.get("vg_free", None)
7159 if not isinstance(vg_free, int):
7160 raise errors.OpPrereqError("Can't compute free disk space on node"
7161 " %s for vg %s, result was '%s'" %
7162 (node, vg, vg_free), errors.ECODE_ENVIRON)
7163 if requested > vg_free:
7164 raise errors.OpPrereqError("Not enough disk space on target node %s"
7165 " vg %s: required %d MiB, available %d MiB" %
7166 (node, vg, requested, vg_free),
7170 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7171 """Checks if nodes have enough physical CPUs
7173 This function checks if all given nodes have the needed number of
7174 physical CPUs. In case any node has less CPUs or we cannot get the
7175 information from the node, this function raises an OpPrereqError
7178 @type lu: C{LogicalUnit}
7179 @param lu: a logical unit from which we get configuration data
7180 @type nodenames: C{list}
7181 @param nodenames: the list of node names to check
7182 @type requested: C{int}
7183 @param requested: the minimum acceptable number of physical CPUs
7184 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7185 or we cannot check the node
7188 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7189 for node in nodenames:
7190 info = nodeinfo[node]
7191 info.Raise("Cannot get current information from node %s" % node,
7192 prereq=True, ecode=errors.ECODE_ENVIRON)
7193 (_, _, (hv_info, )) = info.payload
7194 num_cpus = hv_info.get("cpu_total", None)
7195 if not isinstance(num_cpus, int):
7196 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7197 " on node %s, result was '%s'" %
7198 (node, num_cpus), errors.ECODE_ENVIRON)
7199 if requested > num_cpus:
7200 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7201 "required" % (node, num_cpus, requested),
7205 class LUInstanceStartup(LogicalUnit):
7206 """Starts an instance.
7209 HPATH = "instance-start"
7210 HTYPE = constants.HTYPE_INSTANCE
7213 def CheckArguments(self):
7215 if self.op.beparams:
7216 # fill the beparams dict
7217 objects.UpgradeBeParams(self.op.beparams)
7218 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7220 def ExpandNames(self):
7221 self._ExpandAndLockInstance()
7222 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7224 def DeclareLocks(self, level):
7225 if level == locking.LEVEL_NODE_RES:
7226 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7228 def BuildHooksEnv(self):
7231 This runs on master, primary and secondary nodes of the instance.
7235 "FORCE": self.op.force,
7238 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7242 def BuildHooksNodes(self):
7243 """Build hooks nodes.
7246 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7249 def CheckPrereq(self):
7250 """Check prerequisites.
7252 This checks that the instance is in the cluster.
7255 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256 assert self.instance is not None, \
7257 "Cannot retrieve locked instance %s" % self.op.instance_name
7260 if self.op.hvparams:
7261 # check hypervisor parameter syntax (locally)
7262 cluster = self.cfg.GetClusterInfo()
7263 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7264 filled_hvp = cluster.FillHV(instance)
7265 filled_hvp.update(self.op.hvparams)
7266 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7267 hv_type.CheckParameterSyntax(filled_hvp)
7268 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7270 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7272 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7274 if self.primary_offline and self.op.ignore_offline_nodes:
7275 self.LogWarning("Ignoring offline primary node")
7277 if self.op.hvparams or self.op.beparams:
7278 self.LogWarning("Overridden parameters are ignored")
7280 _CheckNodeOnline(self, instance.primary_node)
7282 bep = self.cfg.GetClusterInfo().FillBE(instance)
7283 bep.update(self.op.beparams)
7285 # check bridges existence
7286 _CheckInstanceBridgesExist(self, instance)
7288 remote_info = self.rpc.call_instance_info(instance.primary_node,
7290 instance.hypervisor)
7291 remote_info.Raise("Error checking node %s" % instance.primary_node,
7292 prereq=True, ecode=errors.ECODE_ENVIRON)
7293 if not remote_info.payload: # not running already
7294 _CheckNodeFreeMemory(self, instance.primary_node,
7295 "starting instance %s" % instance.name,
7296 bep[constants.BE_MINMEM], instance.hypervisor)
7298 def Exec(self, feedback_fn):
7299 """Start the instance.
7302 instance = self.instance
7303 force = self.op.force
7305 if not self.op.no_remember:
7306 self.cfg.MarkInstanceUp(instance.name)
7308 if self.primary_offline:
7309 assert self.op.ignore_offline_nodes
7310 self.LogInfo("Primary node offline, marked instance as started")
7312 node_current = instance.primary_node
7314 _StartInstanceDisks(self, instance, force)
7317 self.rpc.call_instance_start(node_current,
7318 (instance, self.op.hvparams,
7320 self.op.startup_paused)
7321 msg = result.fail_msg
7323 _ShutdownInstanceDisks(self, instance)
7324 raise errors.OpExecError("Could not start instance: %s" % msg)
7327 class LUInstanceReboot(LogicalUnit):
7328 """Reboot an instance.
7331 HPATH = "instance-reboot"
7332 HTYPE = constants.HTYPE_INSTANCE
7335 def ExpandNames(self):
7336 self._ExpandAndLockInstance()
7338 def BuildHooksEnv(self):
7341 This runs on master, primary and secondary nodes of the instance.
7345 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7346 "REBOOT_TYPE": self.op.reboot_type,
7347 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7350 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7354 def BuildHooksNodes(self):
7355 """Build hooks nodes.
7358 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7361 def CheckPrereq(self):
7362 """Check prerequisites.
7364 This checks that the instance is in the cluster.
7367 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7368 assert self.instance is not None, \
7369 "Cannot retrieve locked instance %s" % self.op.instance_name
7370 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7371 _CheckNodeOnline(self, instance.primary_node)
7373 # check bridges existence
7374 _CheckInstanceBridgesExist(self, instance)
7376 def Exec(self, feedback_fn):
7377 """Reboot the instance.
7380 instance = self.instance
7381 ignore_secondaries = self.op.ignore_secondaries
7382 reboot_type = self.op.reboot_type
7384 remote_info = self.rpc.call_instance_info(instance.primary_node,
7386 instance.hypervisor)
7387 remote_info.Raise("Error checking node %s" % instance.primary_node)
7388 instance_running = bool(remote_info.payload)
7390 node_current = instance.primary_node
7392 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7393 constants.INSTANCE_REBOOT_HARD]:
7394 for disk in instance.disks:
7395 self.cfg.SetDiskID(disk, node_current)
7396 result = self.rpc.call_instance_reboot(node_current, instance,
7398 self.op.shutdown_timeout)
7399 result.Raise("Could not reboot instance")
7401 if instance_running:
7402 result = self.rpc.call_instance_shutdown(node_current, instance,
7403 self.op.shutdown_timeout)
7404 result.Raise("Could not shutdown instance for full reboot")
7405 _ShutdownInstanceDisks(self, instance)
7407 self.LogInfo("Instance %s was already stopped, starting now",
7409 _StartInstanceDisks(self, instance, ignore_secondaries)
7410 result = self.rpc.call_instance_start(node_current,
7411 (instance, None, None), False)
7412 msg = result.fail_msg
7414 _ShutdownInstanceDisks(self, instance)
7415 raise errors.OpExecError("Could not start instance for"
7416 " full reboot: %s" % msg)
7418 self.cfg.MarkInstanceUp(instance.name)
7421 class LUInstanceShutdown(LogicalUnit):
7422 """Shutdown an instance.
7425 HPATH = "instance-stop"
7426 HTYPE = constants.HTYPE_INSTANCE
7429 def ExpandNames(self):
7430 self._ExpandAndLockInstance()
7432 def BuildHooksEnv(self):
7435 This runs on master, primary and secondary nodes of the instance.
7438 env = _BuildInstanceHookEnvByObject(self, self.instance)
7439 env["TIMEOUT"] = self.op.timeout
7442 def BuildHooksNodes(self):
7443 """Build hooks nodes.
7446 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7449 def CheckPrereq(self):
7450 """Check prerequisites.
7452 This checks that the instance is in the cluster.
7455 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7456 assert self.instance is not None, \
7457 "Cannot retrieve locked instance %s" % self.op.instance_name
7459 if not self.op.force:
7460 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7462 self.LogWarning("Ignoring offline instance check")
7464 self.primary_offline = \
7465 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7467 if self.primary_offline and self.op.ignore_offline_nodes:
7468 self.LogWarning("Ignoring offline primary node")
7470 _CheckNodeOnline(self, self.instance.primary_node)
7472 def Exec(self, feedback_fn):
7473 """Shutdown the instance.
7476 instance = self.instance
7477 node_current = instance.primary_node
7478 timeout = self.op.timeout
7480 # If the instance is offline we shouldn't mark it as down, as that
7481 # resets the offline flag.
7482 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7483 self.cfg.MarkInstanceDown(instance.name)
7485 if self.primary_offline:
7486 assert self.op.ignore_offline_nodes
7487 self.LogInfo("Primary node offline, marked instance as stopped")
7489 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7490 msg = result.fail_msg
7492 self.LogWarning("Could not shutdown instance: %s", msg)
7494 _ShutdownInstanceDisks(self, instance)
7497 class LUInstanceReinstall(LogicalUnit):
7498 """Reinstall an instance.
7501 HPATH = "instance-reinstall"
7502 HTYPE = constants.HTYPE_INSTANCE
7505 def ExpandNames(self):
7506 self._ExpandAndLockInstance()
7508 def BuildHooksEnv(self):
7511 This runs on master, primary and secondary nodes of the instance.
7514 return _BuildInstanceHookEnvByObject(self, self.instance)
7516 def BuildHooksNodes(self):
7517 """Build hooks nodes.
7520 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7523 def CheckPrereq(self):
7524 """Check prerequisites.
7526 This checks that the instance is in the cluster and is not running.
7529 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7530 assert instance is not None, \
7531 "Cannot retrieve locked instance %s" % self.op.instance_name
7532 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7533 " offline, cannot reinstall")
7535 if instance.disk_template == constants.DT_DISKLESS:
7536 raise errors.OpPrereqError("Instance '%s' has no disks" %
7537 self.op.instance_name,
7539 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7541 if self.op.os_type is not None:
7543 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7544 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7545 instance_os = self.op.os_type
7547 instance_os = instance.os
7549 nodelist = list(instance.all_nodes)
7551 if self.op.osparams:
7552 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7553 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7554 self.os_inst = i_osdict # the new dict (without defaults)
7558 self.instance = instance
7560 def Exec(self, feedback_fn):
7561 """Reinstall the instance.
7564 inst = self.instance
7566 if self.op.os_type is not None:
7567 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7568 inst.os = self.op.os_type
7569 # Write to configuration
7570 self.cfg.Update(inst, feedback_fn)
7572 _StartInstanceDisks(self, inst, None)
7574 feedback_fn("Running the instance OS create scripts...")
7575 # FIXME: pass debug option from opcode to backend
7576 result = self.rpc.call_instance_os_add(inst.primary_node,
7577 (inst, self.os_inst), True,
7578 self.op.debug_level)
7579 result.Raise("Could not install OS for instance %s on node %s" %
7580 (inst.name, inst.primary_node))
7582 _ShutdownInstanceDisks(self, inst)
7585 class LUInstanceRecreateDisks(LogicalUnit):
7586 """Recreate an instance's missing disks.
7589 HPATH = "instance-recreate-disks"
7590 HTYPE = constants.HTYPE_INSTANCE
7593 _MODIFYABLE = compat.UniqueFrozenset([
7594 constants.IDISK_SIZE,
7595 constants.IDISK_MODE,
7598 # New or changed disk parameters may have different semantics
7599 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7600 constants.IDISK_ADOPT,
7602 # TODO: Implement support changing VG while recreating
7604 constants.IDISK_METAVG,
7605 constants.IDISK_PROVIDER,
7608 def _RunAllocator(self):
7609 """Run the allocator based on input opcode.
7612 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7615 # The allocator should actually run in "relocate" mode, but current
7616 # allocators don't support relocating all the nodes of an instance at
7617 # the same time. As a workaround we use "allocate" mode, but this is
7618 # suboptimal for two reasons:
7619 # - The instance name passed to the allocator is present in the list of
7620 # existing instances, so there could be a conflict within the
7621 # internal structures of the allocator. This doesn't happen with the
7622 # current allocators, but it's a liability.
7623 # - The allocator counts the resources used by the instance twice: once
7624 # because the instance exists already, and once because it tries to
7625 # allocate a new instance.
7626 # The allocator could choose some of the nodes on which the instance is
7627 # running, but that's not a problem. If the instance nodes are broken,
7628 # they should be already be marked as drained or offline, and hence
7629 # skipped by the allocator. If instance disks have been lost for other
7630 # reasons, then recreating the disks on the same nodes should be fine.
7631 disk_template = self.instance.disk_template
7632 spindle_use = be_full[constants.BE_SPINDLE_USE]
7633 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7634 disk_template=disk_template,
7635 tags=list(self.instance.GetTags()),
7636 os=self.instance.os,
7638 vcpus=be_full[constants.BE_VCPUS],
7639 memory=be_full[constants.BE_MAXMEM],
7640 spindle_use=spindle_use,
7641 disks=[{constants.IDISK_SIZE: d.size,
7642 constants.IDISK_MODE: d.mode}
7643 for d in self.instance.disks],
7644 hypervisor=self.instance.hypervisor,
7645 node_whitelist=None)
7646 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7648 ial.Run(self.op.iallocator)
7650 assert req.RequiredNodes() == len(self.instance.all_nodes)
7653 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7654 " %s" % (self.op.iallocator, ial.info),
7657 self.op.nodes = ial.result
7658 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7659 self.op.instance_name, self.op.iallocator,
7660 utils.CommaJoin(ial.result))
7662 def CheckArguments(self):
7663 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7664 # Normalize and convert deprecated list of disk indices
7665 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7667 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7669 raise errors.OpPrereqError("Some disks have been specified more than"
7670 " once: %s" % utils.CommaJoin(duplicates),
7673 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7674 # when neither iallocator nor nodes are specified
7675 if self.op.iallocator or self.op.nodes:
7676 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7678 for (idx, params) in self.op.disks:
7679 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7680 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7682 raise errors.OpPrereqError("Parameters for disk %s try to change"
7683 " unmodifyable parameter(s): %s" %
7684 (idx, utils.CommaJoin(unsupported)),
7687 def ExpandNames(self):
7688 self._ExpandAndLockInstance()
7689 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7692 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7693 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7695 self.needed_locks[locking.LEVEL_NODE] = []
7696 if self.op.iallocator:
7697 # iallocator will select a new node in the same group
7698 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7699 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7701 self.needed_locks[locking.LEVEL_NODE_RES] = []
7703 def DeclareLocks(self, level):
7704 if level == locking.LEVEL_NODEGROUP:
7705 assert self.op.iallocator is not None
7706 assert not self.op.nodes
7707 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7708 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7709 # Lock the primary group used by the instance optimistically; this
7710 # requires going via the node before it's locked, requiring
7711 # verification later on
7712 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7713 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7715 elif level == locking.LEVEL_NODE:
7716 # If an allocator is used, then we lock all the nodes in the current
7717 # instance group, as we don't know yet which ones will be selected;
7718 # if we replace the nodes without using an allocator, locks are
7719 # already declared in ExpandNames; otherwise, we need to lock all the
7720 # instance nodes for disk re-creation
7721 if self.op.iallocator:
7722 assert not self.op.nodes
7723 assert not self.needed_locks[locking.LEVEL_NODE]
7724 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7726 # Lock member nodes of the group of the primary node
7727 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7728 self.needed_locks[locking.LEVEL_NODE].extend(
7729 self.cfg.GetNodeGroup(group_uuid).members)
7731 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7732 elif not self.op.nodes:
7733 self._LockInstancesNodes(primary_only=False)
7734 elif level == locking.LEVEL_NODE_RES:
7736 self.needed_locks[locking.LEVEL_NODE_RES] = \
7737 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7739 def BuildHooksEnv(self):
7742 This runs on master, primary and secondary nodes of the instance.
7745 return _BuildInstanceHookEnvByObject(self, self.instance)
7747 def BuildHooksNodes(self):
7748 """Build hooks nodes.
7751 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7754 def CheckPrereq(self):
7755 """Check prerequisites.
7757 This checks that the instance is in the cluster and is not running.
7760 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7761 assert instance is not None, \
7762 "Cannot retrieve locked instance %s" % self.op.instance_name
7764 if len(self.op.nodes) != len(instance.all_nodes):
7765 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7766 " %d replacement nodes were specified" %
7767 (instance.name, len(instance.all_nodes),
7768 len(self.op.nodes)),
7770 assert instance.disk_template != constants.DT_DRBD8 or \
7771 len(self.op.nodes) == 2
7772 assert instance.disk_template != constants.DT_PLAIN or \
7773 len(self.op.nodes) == 1
7774 primary_node = self.op.nodes[0]
7776 primary_node = instance.primary_node
7777 if not self.op.iallocator:
7778 _CheckNodeOnline(self, primary_node)
7780 if instance.disk_template == constants.DT_DISKLESS:
7781 raise errors.OpPrereqError("Instance '%s' has no disks" %
7782 self.op.instance_name, errors.ECODE_INVAL)
7784 # Verify if node group locks are still correct
7785 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7787 # Node group locks are acquired only for the primary node (and only
7788 # when the allocator is used)
7789 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7792 # if we replace nodes *and* the old primary is offline, we don't
7793 # check the instance state
7794 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7795 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7796 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7797 msg="cannot recreate disks")
7800 self.disks = dict(self.op.disks)
7802 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7804 maxidx = max(self.disks.keys())
7805 if maxidx >= len(instance.disks):
7806 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7809 if ((self.op.nodes or self.op.iallocator) and
7810 sorted(self.disks.keys()) != range(len(instance.disks))):
7811 raise errors.OpPrereqError("Can't recreate disks partially and"
7812 " change the nodes at the same time",
7815 self.instance = instance
7817 if self.op.iallocator:
7818 self._RunAllocator()
7819 # Release unneeded node and node resource locks
7820 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7821 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7822 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7824 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7826 def Exec(self, feedback_fn):
7827 """Recreate the disks.
7830 instance = self.instance
7832 assert (self.owned_locks(locking.LEVEL_NODE) ==
7833 self.owned_locks(locking.LEVEL_NODE_RES))
7836 mods = [] # keeps track of needed changes
7838 for idx, disk in enumerate(instance.disks):
7840 changes = self.disks[idx]
7842 # Disk should not be recreated
7846 # update secondaries for disks, if needed
7847 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7848 # need to update the nodes and minors
7849 assert len(self.op.nodes) == 2
7850 assert len(disk.logical_id) == 6 # otherwise disk internals
7852 (_, _, old_port, _, _, old_secret) = disk.logical_id
7853 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7854 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7855 new_minors[0], new_minors[1], old_secret)
7856 assert len(disk.logical_id) == len(new_id)
7860 mods.append((idx, new_id, changes))
7862 # now that we have passed all asserts above, we can apply the mods
7863 # in a single run (to avoid partial changes)
7864 for idx, new_id, changes in mods:
7865 disk = instance.disks[idx]
7866 if new_id is not None:
7867 assert disk.dev_type == constants.LD_DRBD8
7868 disk.logical_id = new_id
7870 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7871 mode=changes.get(constants.IDISK_MODE, None))
7873 # change primary node, if needed
7875 instance.primary_node = self.op.nodes[0]
7876 self.LogWarning("Changing the instance's nodes, you will have to"
7877 " remove any disks left on the older nodes manually")
7880 self.cfg.Update(instance, feedback_fn)
7882 # All touched nodes must be locked
7883 mylocks = self.owned_locks(locking.LEVEL_NODE)
7884 assert mylocks.issuperset(frozenset(instance.all_nodes))
7885 _CreateDisks(self, instance, to_skip=to_skip)
7888 class LUInstanceRename(LogicalUnit):
7889 """Rename an instance.
7892 HPATH = "instance-rename"
7893 HTYPE = constants.HTYPE_INSTANCE
7895 def CheckArguments(self):
7899 if self.op.ip_check and not self.op.name_check:
7900 # TODO: make the ip check more flexible and not depend on the name check
7901 raise errors.OpPrereqError("IP address check requires a name check",
7904 def BuildHooksEnv(self):
7907 This runs on master, primary and secondary nodes of the instance.
7910 env = _BuildInstanceHookEnvByObject(self, self.instance)
7911 env["INSTANCE_NEW_NAME"] = self.op.new_name
7914 def BuildHooksNodes(self):
7915 """Build hooks nodes.
7918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7921 def CheckPrereq(self):
7922 """Check prerequisites.
7924 This checks that the instance is in the cluster and is not running.
7927 self.op.instance_name = _ExpandInstanceName(self.cfg,
7928 self.op.instance_name)
7929 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7930 assert instance is not None
7931 _CheckNodeOnline(self, instance.primary_node)
7932 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7933 msg="cannot rename")
7934 self.instance = instance
7936 new_name = self.op.new_name
7937 if self.op.name_check:
7938 hostname = _CheckHostnameSane(self, new_name)
7939 new_name = self.op.new_name = hostname.name
7940 if (self.op.ip_check and
7941 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7942 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7943 (hostname.ip, new_name),
7944 errors.ECODE_NOTUNIQUE)
7946 instance_list = self.cfg.GetInstanceList()
7947 if new_name in instance_list and new_name != instance.name:
7948 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7949 new_name, errors.ECODE_EXISTS)
7951 def Exec(self, feedback_fn):
7952 """Rename the instance.
7955 inst = self.instance
7956 old_name = inst.name
7958 rename_file_storage = False
7959 if (inst.disk_template in constants.DTS_FILEBASED and
7960 self.op.new_name != inst.name):
7961 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962 rename_file_storage = True
7964 self.cfg.RenameInstance(inst.name, self.op.new_name)
7965 # Change the instance lock. This is definitely safe while we hold the BGL.
7966 # Otherwise the new lock would have to be added in acquired mode.
7968 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7969 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7970 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7972 # re-read the instance from the configuration after rename
7973 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7975 if rename_file_storage:
7976 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7977 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7978 old_file_storage_dir,
7979 new_file_storage_dir)
7980 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7981 " (but the instance has been renamed in Ganeti)" %
7982 (inst.primary_node, old_file_storage_dir,
7983 new_file_storage_dir))
7985 _StartInstanceDisks(self, inst, None)
7986 # update info on disks
7987 info = _GetInstanceInfoText(inst)
7988 for (idx, disk) in enumerate(inst.disks):
7989 for node in inst.all_nodes:
7990 self.cfg.SetDiskID(disk, node)
7991 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7993 self.LogWarning("Error setting info on node %s for disk %s: %s",
7994 node, idx, result.fail_msg)
7996 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7997 old_name, self.op.debug_level)
7998 msg = result.fail_msg
8000 msg = ("Could not run OS rename script for instance %s on node %s"
8001 " (but the instance has been renamed in Ganeti): %s" %
8002 (inst.name, inst.primary_node, msg))
8003 self.LogWarning(msg)
8005 _ShutdownInstanceDisks(self, inst)
8010 class LUInstanceRemove(LogicalUnit):
8011 """Remove an instance.
8014 HPATH = "instance-remove"
8015 HTYPE = constants.HTYPE_INSTANCE
8018 def ExpandNames(self):
8019 self._ExpandAndLockInstance()
8020 self.needed_locks[locking.LEVEL_NODE] = []
8021 self.needed_locks[locking.LEVEL_NODE_RES] = []
8022 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8024 def DeclareLocks(self, level):
8025 if level == locking.LEVEL_NODE:
8026 self._LockInstancesNodes()
8027 elif level == locking.LEVEL_NODE_RES:
8029 self.needed_locks[locking.LEVEL_NODE_RES] = \
8030 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8032 def BuildHooksEnv(self):
8035 This runs on master, primary and secondary nodes of the instance.
8038 env = _BuildInstanceHookEnvByObject(self, self.instance)
8039 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8042 def BuildHooksNodes(self):
8043 """Build hooks nodes.
8046 nl = [self.cfg.GetMasterNode()]
8047 nl_post = list(self.instance.all_nodes) + nl
8048 return (nl, nl_post)
8050 def CheckPrereq(self):
8051 """Check prerequisites.
8053 This checks that the instance is in the cluster.
8056 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8057 assert self.instance is not None, \
8058 "Cannot retrieve locked instance %s" % self.op.instance_name
8060 def Exec(self, feedback_fn):
8061 """Remove the instance.
8064 instance = self.instance
8065 logging.info("Shutting down instance %s on node %s",
8066 instance.name, instance.primary_node)
8068 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8069 self.op.shutdown_timeout)
8070 msg = result.fail_msg
8072 if self.op.ignore_failures:
8073 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8075 raise errors.OpExecError("Could not shutdown instance %s on"
8077 (instance.name, instance.primary_node, msg))
8079 assert (self.owned_locks(locking.LEVEL_NODE) ==
8080 self.owned_locks(locking.LEVEL_NODE_RES))
8081 assert not (set(instance.all_nodes) -
8082 self.owned_locks(locking.LEVEL_NODE)), \
8083 "Not owning correct locks"
8085 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8088 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8089 """Utility function to remove an instance.
8092 logging.info("Removing block devices for instance %s", instance.name)
8094 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8095 if not ignore_failures:
8096 raise errors.OpExecError("Can't remove instance's disks")
8097 feedback_fn("Warning: can't remove instance's disks")
8099 logging.info("Removing instance %s out of cluster config", instance.name)
8101 lu.cfg.RemoveInstance(instance.name)
8103 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8104 "Instance lock removal conflict"
8106 # Remove lock for the instance
8107 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8110 class LUInstanceQuery(NoHooksLU):
8111 """Logical unit for querying instances.
8114 # pylint: disable=W0142
8117 def CheckArguments(self):
8118 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8119 self.op.output_fields, self.op.use_locking)
8121 def ExpandNames(self):
8122 self.iq.ExpandNames(self)
8124 def DeclareLocks(self, level):
8125 self.iq.DeclareLocks(self, level)
8127 def Exec(self, feedback_fn):
8128 return self.iq.OldStyleQuery(self)
8131 def _ExpandNamesForMigration(lu):
8132 """Expands names for use with L{TLMigrateInstance}.
8134 @type lu: L{LogicalUnit}
8137 if lu.op.target_node is not None:
8138 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8140 lu.needed_locks[locking.LEVEL_NODE] = []
8141 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8143 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8144 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8146 # The node allocation lock is actually only needed for replicated instances
8147 # (e.g. DRBD8) and if an iallocator is used.
8148 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8151 def _DeclareLocksForMigration(lu, level):
8152 """Declares locks for L{TLMigrateInstance}.
8154 @type lu: L{LogicalUnit}
8155 @param level: Lock level
8158 if level == locking.LEVEL_NODE_ALLOC:
8159 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8161 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8163 # Node locks are already declared here rather than at LEVEL_NODE as we need
8164 # the instance object anyway to declare the node allocation lock.
8165 if instance.disk_template in constants.DTS_EXT_MIRROR:
8166 if lu.op.target_node is None:
8167 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8168 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8170 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8172 del lu.recalculate_locks[locking.LEVEL_NODE]
8174 lu._LockInstancesNodes() # pylint: disable=W0212
8176 elif level == locking.LEVEL_NODE:
8177 # Node locks are declared together with the node allocation lock
8178 assert (lu.needed_locks[locking.LEVEL_NODE] or
8179 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8181 elif level == locking.LEVEL_NODE_RES:
8183 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8184 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8187 class LUInstanceFailover(LogicalUnit):
8188 """Failover an instance.
8191 HPATH = "instance-failover"
8192 HTYPE = constants.HTYPE_INSTANCE
8195 def CheckArguments(self):
8196 """Check the arguments.
8199 self.iallocator = getattr(self.op, "iallocator", None)
8200 self.target_node = getattr(self.op, "target_node", None)
8202 def ExpandNames(self):
8203 self._ExpandAndLockInstance()
8204 _ExpandNamesForMigration(self)
8207 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8208 self.op.ignore_consistency, True,
8209 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8211 self.tasklets = [self._migrater]
8213 def DeclareLocks(self, level):
8214 _DeclareLocksForMigration(self, level)
8216 def BuildHooksEnv(self):
8219 This runs on master, primary and secondary nodes of the instance.
8222 instance = self._migrater.instance
8223 source_node = instance.primary_node
8224 target_node = self.op.target_node
8226 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8227 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8228 "OLD_PRIMARY": source_node,
8229 "NEW_PRIMARY": target_node,
8232 if instance.disk_template in constants.DTS_INT_MIRROR:
8233 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8234 env["NEW_SECONDARY"] = source_node
8236 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8238 env.update(_BuildInstanceHookEnvByObject(self, instance))
8242 def BuildHooksNodes(self):
8243 """Build hooks nodes.
8246 instance = self._migrater.instance
8247 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8248 return (nl, nl + [instance.primary_node])
8251 class LUInstanceMigrate(LogicalUnit):
8252 """Migrate an instance.
8254 This is migration without shutting down, compared to the failover,
8255 which is done with shutdown.
8258 HPATH = "instance-migrate"
8259 HTYPE = constants.HTYPE_INSTANCE
8262 def ExpandNames(self):
8263 self._ExpandAndLockInstance()
8264 _ExpandNamesForMigration(self)
8267 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8268 False, self.op.allow_failover, False,
8269 self.op.allow_runtime_changes,
8270 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8271 self.op.ignore_ipolicy)
8273 self.tasklets = [self._migrater]
8275 def DeclareLocks(self, level):
8276 _DeclareLocksForMigration(self, level)
8278 def BuildHooksEnv(self):
8281 This runs on master, primary and secondary nodes of the instance.
8284 instance = self._migrater.instance
8285 source_node = instance.primary_node
8286 target_node = self.op.target_node
8287 env = _BuildInstanceHookEnvByObject(self, instance)
8289 "MIGRATE_LIVE": self._migrater.live,
8290 "MIGRATE_CLEANUP": self.op.cleanup,
8291 "OLD_PRIMARY": source_node,
8292 "NEW_PRIMARY": target_node,
8293 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8296 if instance.disk_template in constants.DTS_INT_MIRROR:
8297 env["OLD_SECONDARY"] = target_node
8298 env["NEW_SECONDARY"] = source_node
8300 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8304 def BuildHooksNodes(self):
8305 """Build hooks nodes.
8308 instance = self._migrater.instance
8309 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8310 return (nl, nl + [instance.primary_node])
8313 class LUInstanceMove(LogicalUnit):
8314 """Move an instance by data-copying.
8317 HPATH = "instance-move"
8318 HTYPE = constants.HTYPE_INSTANCE
8321 def ExpandNames(self):
8322 self._ExpandAndLockInstance()
8323 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8324 self.op.target_node = target_node
8325 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8326 self.needed_locks[locking.LEVEL_NODE_RES] = []
8327 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8329 def DeclareLocks(self, level):
8330 if level == locking.LEVEL_NODE:
8331 self._LockInstancesNodes(primary_only=True)
8332 elif level == locking.LEVEL_NODE_RES:
8334 self.needed_locks[locking.LEVEL_NODE_RES] = \
8335 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8337 def BuildHooksEnv(self):
8340 This runs on master, primary and secondary nodes of the instance.
8344 "TARGET_NODE": self.op.target_node,
8345 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8347 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8350 def BuildHooksNodes(self):
8351 """Build hooks nodes.
8355 self.cfg.GetMasterNode(),
8356 self.instance.primary_node,
8357 self.op.target_node,
8361 def CheckPrereq(self):
8362 """Check prerequisites.
8364 This checks that the instance is in the cluster.
8367 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8368 assert self.instance is not None, \
8369 "Cannot retrieve locked instance %s" % self.op.instance_name
8371 node = self.cfg.GetNodeInfo(self.op.target_node)
8372 assert node is not None, \
8373 "Cannot retrieve locked node %s" % self.op.target_node
8375 self.target_node = target_node = node.name
8377 if target_node == instance.primary_node:
8378 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8379 (instance.name, target_node),
8382 bep = self.cfg.GetClusterInfo().FillBE(instance)
8384 for idx, dsk in enumerate(instance.disks):
8385 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8386 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8387 " cannot copy" % idx, errors.ECODE_STATE)
8389 _CheckNodeOnline(self, target_node)
8390 _CheckNodeNotDrained(self, target_node)
8391 _CheckNodeVmCapable(self, target_node)
8392 cluster = self.cfg.GetClusterInfo()
8393 group_info = self.cfg.GetNodeGroup(node.group)
8394 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8395 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8396 ignore=self.op.ignore_ipolicy)
8398 if instance.admin_state == constants.ADMINST_UP:
8399 # check memory requirements on the secondary node
8400 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8401 instance.name, bep[constants.BE_MAXMEM],
8402 instance.hypervisor)
8404 self.LogInfo("Not checking memory on the secondary node as"
8405 " instance will not be started")
8407 # check bridge existance
8408 _CheckInstanceBridgesExist(self, instance, node=target_node)
8410 def Exec(self, feedback_fn):
8411 """Move an instance.
8413 The move is done by shutting it down on its present node, copying
8414 the data over (slow) and starting it on the new node.
8417 instance = self.instance
8419 source_node = instance.primary_node
8420 target_node = self.target_node
8422 self.LogInfo("Shutting down instance %s on source node %s",
8423 instance.name, source_node)
8425 assert (self.owned_locks(locking.LEVEL_NODE) ==
8426 self.owned_locks(locking.LEVEL_NODE_RES))
8428 result = self.rpc.call_instance_shutdown(source_node, instance,
8429 self.op.shutdown_timeout)
8430 msg = result.fail_msg
8432 if self.op.ignore_consistency:
8433 self.LogWarning("Could not shutdown instance %s on node %s."
8434 " Proceeding anyway. Please make sure node"
8435 " %s is down. Error details: %s",
8436 instance.name, source_node, source_node, msg)
8438 raise errors.OpExecError("Could not shutdown instance %s on"
8440 (instance.name, source_node, msg))
8442 # create the target disks
8444 _CreateDisks(self, instance, target_node=target_node)
8445 except errors.OpExecError:
8446 self.LogWarning("Device creation failed, reverting...")
8448 _RemoveDisks(self, instance, target_node=target_node)
8450 self.cfg.ReleaseDRBDMinors(instance.name)
8453 cluster_name = self.cfg.GetClusterInfo().cluster_name
8456 # activate, get path, copy the data over
8457 for idx, disk in enumerate(instance.disks):
8458 self.LogInfo("Copying data for disk %d", idx)
8459 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8460 instance.name, True, idx)
8462 self.LogWarning("Can't assemble newly created disk %d: %s",
8463 idx, result.fail_msg)
8464 errs.append(result.fail_msg)
8466 dev_path = result.payload
8467 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8468 target_node, dev_path,
8471 self.LogWarning("Can't copy data over for disk %d: %s",
8472 idx, result.fail_msg)
8473 errs.append(result.fail_msg)
8477 self.LogWarning("Some disks failed to copy, aborting")
8479 _RemoveDisks(self, instance, target_node=target_node)
8481 self.cfg.ReleaseDRBDMinors(instance.name)
8482 raise errors.OpExecError("Errors during disk copy: %s" %
8485 instance.primary_node = target_node
8486 self.cfg.Update(instance, feedback_fn)
8488 self.LogInfo("Removing the disks on the original node")
8489 _RemoveDisks(self, instance, target_node=source_node)
8491 # Only start the instance if it's marked as up
8492 if instance.admin_state == constants.ADMINST_UP:
8493 self.LogInfo("Starting instance %s on node %s",
8494 instance.name, target_node)
8496 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8497 ignore_secondaries=True)
8499 _ShutdownInstanceDisks(self, instance)
8500 raise errors.OpExecError("Can't activate the instance's disks")
8502 result = self.rpc.call_instance_start(target_node,
8503 (instance, None, None), False)
8504 msg = result.fail_msg
8506 _ShutdownInstanceDisks(self, instance)
8507 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8508 (instance.name, target_node, msg))
8511 class LUNodeMigrate(LogicalUnit):
8512 """Migrate all instances from a node.
8515 HPATH = "node-migrate"
8516 HTYPE = constants.HTYPE_NODE
8519 def CheckArguments(self):
8522 def ExpandNames(self):
8523 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8525 self.share_locks = _ShareAll()
8526 self.needed_locks = {
8527 locking.LEVEL_NODE: [self.op.node_name],
8530 def BuildHooksEnv(self):
8533 This runs on the master, the primary and all the secondaries.
8537 "NODE_NAME": self.op.node_name,
8538 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8541 def BuildHooksNodes(self):
8542 """Build hooks nodes.
8545 nl = [self.cfg.GetMasterNode()]
8548 def CheckPrereq(self):
8551 def Exec(self, feedback_fn):
8552 # Prepare jobs for migration instances
8553 allow_runtime_changes = self.op.allow_runtime_changes
8555 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8558 iallocator=self.op.iallocator,
8559 target_node=self.op.target_node,
8560 allow_runtime_changes=allow_runtime_changes,
8561 ignore_ipolicy=self.op.ignore_ipolicy)]
8562 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8564 # TODO: Run iallocator in this opcode and pass correct placement options to
8565 # OpInstanceMigrate. Since other jobs can modify the cluster between
8566 # running the iallocator and the actual migration, a good consistency model
8567 # will have to be found.
8569 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8570 frozenset([self.op.node_name]))
8572 return ResultWithJobs(jobs)
8575 class TLMigrateInstance(Tasklet):
8576 """Tasklet class for instance migration.
8579 @ivar live: whether the migration will be done live or non-live;
8580 this variable is initalized only after CheckPrereq has run
8581 @type cleanup: boolean
8582 @ivar cleanup: Wheater we cleanup from a failed migration
8583 @type iallocator: string
8584 @ivar iallocator: The iallocator used to determine target_node
8585 @type target_node: string
8586 @ivar target_node: If given, the target_node to reallocate the instance to
8587 @type failover: boolean
8588 @ivar failover: Whether operation results in failover or migration
8589 @type fallback: boolean
8590 @ivar fallback: Whether fallback to failover is allowed if migration not
8592 @type ignore_consistency: boolean
8593 @ivar ignore_consistency: Wheter we should ignore consistency between source
8595 @type shutdown_timeout: int
8596 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8597 @type ignore_ipolicy: bool
8598 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8603 _MIGRATION_POLL_INTERVAL = 1 # seconds
8604 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8606 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8607 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8609 """Initializes this class.
8612 Tasklet.__init__(self, lu)
8615 self.instance_name = instance_name
8616 self.cleanup = cleanup
8617 self.live = False # will be overridden later
8618 self.failover = failover
8619 self.fallback = fallback
8620 self.ignore_consistency = ignore_consistency
8621 self.shutdown_timeout = shutdown_timeout
8622 self.ignore_ipolicy = ignore_ipolicy
8623 self.allow_runtime_changes = allow_runtime_changes
8625 def CheckPrereq(self):
8626 """Check prerequisites.
8628 This checks that the instance is in the cluster.
8631 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8632 instance = self.cfg.GetInstanceInfo(instance_name)
8633 assert instance is not None
8634 self.instance = instance
8635 cluster = self.cfg.GetClusterInfo()
8637 if (not self.cleanup and
8638 not instance.admin_state == constants.ADMINST_UP and
8639 not self.failover and self.fallback):
8640 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8641 " switching to failover")
8642 self.failover = True
8644 if instance.disk_template not in constants.DTS_MIRRORED:
8649 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8650 " %s" % (instance.disk_template, text),
8653 if instance.disk_template in constants.DTS_EXT_MIRROR:
8654 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8656 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8658 if self.lu.op.iallocator:
8659 self._RunAllocator()
8661 # We set set self.target_node as it is required by
8663 self.target_node = self.lu.op.target_node
8665 # Check that the target node is correct in terms of instance policy
8666 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8667 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8668 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8670 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8671 ignore=self.ignore_ipolicy)
8673 # self.target_node is already populated, either directly or by the
8675 target_node = self.target_node
8676 if self.target_node == instance.primary_node:
8677 raise errors.OpPrereqError("Cannot migrate instance %s"
8678 " to its primary (%s)" %
8679 (instance.name, instance.primary_node),
8682 if len(self.lu.tasklets) == 1:
8683 # It is safe to release locks only when we're the only tasklet
8685 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8686 keep=[instance.primary_node, self.target_node])
8687 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8690 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8692 secondary_nodes = instance.secondary_nodes
8693 if not secondary_nodes:
8694 raise errors.ConfigurationError("No secondary node but using"
8695 " %s disk template" %
8696 instance.disk_template)
8697 target_node = secondary_nodes[0]
8698 if self.lu.op.iallocator or (self.lu.op.target_node and
8699 self.lu.op.target_node != target_node):
8701 text = "failed over"
8704 raise errors.OpPrereqError("Instances with disk template %s cannot"
8705 " be %s to arbitrary nodes"
8706 " (neither an iallocator nor a target"
8707 " node can be passed)" %
8708 (instance.disk_template, text),
8710 nodeinfo = self.cfg.GetNodeInfo(target_node)
8711 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8712 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8714 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8715 ignore=self.ignore_ipolicy)
8717 i_be = cluster.FillBE(instance)
8719 # check memory requirements on the secondary node
8720 if (not self.cleanup and
8721 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8722 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8723 "migrating instance %s" %
8725 i_be[constants.BE_MINMEM],
8726 instance.hypervisor)
8728 self.lu.LogInfo("Not checking memory on the secondary node as"
8729 " instance will not be started")
8731 # check if failover must be forced instead of migration
8732 if (not self.cleanup and not self.failover and
8733 i_be[constants.BE_ALWAYS_FAILOVER]):
8734 self.lu.LogInfo("Instance configured to always failover; fallback"
8736 self.failover = True
8738 # check bridge existance
8739 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8741 if not self.cleanup:
8742 _CheckNodeNotDrained(self.lu, target_node)
8743 if not self.failover:
8744 result = self.rpc.call_instance_migratable(instance.primary_node,
8746 if result.fail_msg and self.fallback:
8747 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8749 self.failover = True
8751 result.Raise("Can't migrate, please use failover",
8752 prereq=True, ecode=errors.ECODE_STATE)
8754 assert not (self.failover and self.cleanup)
8756 if not self.failover:
8757 if self.lu.op.live is not None and self.lu.op.mode is not None:
8758 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8759 " parameters are accepted",
8761 if self.lu.op.live is not None:
8763 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8765 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8766 # reset the 'live' parameter to None so that repeated
8767 # invocations of CheckPrereq do not raise an exception
8768 self.lu.op.live = None
8769 elif self.lu.op.mode is None:
8770 # read the default value from the hypervisor
8771 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8772 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8774 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8776 # Failover is never live
8779 if not (self.failover or self.cleanup):
8780 remote_info = self.rpc.call_instance_info(instance.primary_node,
8782 instance.hypervisor)
8783 remote_info.Raise("Error checking instance on node %s" %
8784 instance.primary_node)
8785 instance_running = bool(remote_info.payload)
8786 if instance_running:
8787 self.current_mem = int(remote_info.payload["memory"])
8789 def _RunAllocator(self):
8790 """Run the allocator based on input opcode.
8793 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8795 # FIXME: add a self.ignore_ipolicy option
8796 req = iallocator.IAReqRelocate(name=self.instance_name,
8797 relocate_from=[self.instance.primary_node])
8798 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8800 ial.Run(self.lu.op.iallocator)
8803 raise errors.OpPrereqError("Can't compute nodes using"
8804 " iallocator '%s': %s" %
8805 (self.lu.op.iallocator, ial.info),
8807 self.target_node = ial.result[0]
8808 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8809 self.instance_name, self.lu.op.iallocator,
8810 utils.CommaJoin(ial.result))
8812 def _WaitUntilSync(self):
8813 """Poll with custom rpc for disk sync.
8815 This uses our own step-based rpc call.
8818 self.feedback_fn("* wait until resync is done")
8822 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8824 (self.instance.disks,
8827 for node, nres in result.items():
8828 nres.Raise("Cannot resync disks on node %s" % node)
8829 node_done, node_percent = nres.payload
8830 all_done = all_done and node_done
8831 if node_percent is not None:
8832 min_percent = min(min_percent, node_percent)
8834 if min_percent < 100:
8835 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8838 def _EnsureSecondary(self, node):
8839 """Demote a node to secondary.
8842 self.feedback_fn("* switching node %s to secondary mode" % node)
8844 for dev in self.instance.disks:
8845 self.cfg.SetDiskID(dev, node)
8847 result = self.rpc.call_blockdev_close(node, self.instance.name,
8848 self.instance.disks)
8849 result.Raise("Cannot change disk to secondary on node %s" % node)
8851 def _GoStandalone(self):
8852 """Disconnect from the network.
8855 self.feedback_fn("* changing into standalone mode")
8856 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8857 self.instance.disks)
8858 for node, nres in result.items():
8859 nres.Raise("Cannot disconnect disks node %s" % node)
8861 def _GoReconnect(self, multimaster):
8862 """Reconnect to the network.
8868 msg = "single-master"
8869 self.feedback_fn("* changing disks into %s mode" % msg)
8870 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8871 (self.instance.disks, self.instance),
8872 self.instance.name, multimaster)
8873 for node, nres in result.items():
8874 nres.Raise("Cannot change disks config on node %s" % node)
8876 def _ExecCleanup(self):
8877 """Try to cleanup after a failed migration.
8879 The cleanup is done by:
8880 - check that the instance is running only on one node
8881 (and update the config if needed)
8882 - change disks on its secondary node to secondary
8883 - wait until disks are fully synchronized
8884 - disconnect from the network
8885 - change disks into single-master mode
8886 - wait again until disks are fully synchronized
8889 instance = self.instance
8890 target_node = self.target_node
8891 source_node = self.source_node
8893 # check running on only one node
8894 self.feedback_fn("* checking where the instance actually runs"
8895 " (if this hangs, the hypervisor might be in"
8897 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8898 for node, result in ins_l.items():
8899 result.Raise("Can't contact node %s" % node)
8901 runningon_source = instance.name in ins_l[source_node].payload
8902 runningon_target = instance.name in ins_l[target_node].payload
8904 if runningon_source and runningon_target:
8905 raise errors.OpExecError("Instance seems to be running on two nodes,"
8906 " or the hypervisor is confused; you will have"
8907 " to ensure manually that it runs only on one"
8908 " and restart this operation")
8910 if not (runningon_source or runningon_target):
8911 raise errors.OpExecError("Instance does not seem to be running at all;"
8912 " in this case it's safer to repair by"
8913 " running 'gnt-instance stop' to ensure disk"
8914 " shutdown, and then restarting it")
8916 if runningon_target:
8917 # the migration has actually succeeded, we need to update the config
8918 self.feedback_fn("* instance running on secondary node (%s),"
8919 " updating config" % target_node)
8920 instance.primary_node = target_node
8921 self.cfg.Update(instance, self.feedback_fn)
8922 demoted_node = source_node
8924 self.feedback_fn("* instance confirmed to be running on its"
8925 " primary node (%s)" % source_node)
8926 demoted_node = target_node
8928 if instance.disk_template in constants.DTS_INT_MIRROR:
8929 self._EnsureSecondary(demoted_node)
8931 self._WaitUntilSync()
8932 except errors.OpExecError:
8933 # we ignore here errors, since if the device is standalone, it
8934 # won't be able to sync
8936 self._GoStandalone()
8937 self._GoReconnect(False)
8938 self._WaitUntilSync()
8940 self.feedback_fn("* done")
8942 def _RevertDiskStatus(self):
8943 """Try to revert the disk status after a failed migration.
8946 target_node = self.target_node
8947 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8951 self._EnsureSecondary(target_node)
8952 self._GoStandalone()
8953 self._GoReconnect(False)
8954 self._WaitUntilSync()
8955 except errors.OpExecError, err:
8956 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8957 " please try to recover the instance manually;"
8958 " error '%s'" % str(err))
8960 def _AbortMigration(self):
8961 """Call the hypervisor code to abort a started migration.
8964 instance = self.instance
8965 target_node = self.target_node
8966 source_node = self.source_node
8967 migration_info = self.migration_info
8969 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8973 abort_msg = abort_result.fail_msg
8975 logging.error("Aborting migration failed on target node %s: %s",
8976 target_node, abort_msg)
8977 # Don't raise an exception here, as we stil have to try to revert the
8978 # disk status, even if this step failed.
8980 abort_result = self.rpc.call_instance_finalize_migration_src(
8981 source_node, instance, False, self.live)
8982 abort_msg = abort_result.fail_msg
8984 logging.error("Aborting migration failed on source node %s: %s",
8985 source_node, abort_msg)
8987 def _ExecMigration(self):
8988 """Migrate an instance.
8990 The migrate is done by:
8991 - change the disks into dual-master mode
8992 - wait until disks are fully synchronized again
8993 - migrate the instance
8994 - change disks on the new secondary node (the old primary) to secondary
8995 - wait until disks are fully synchronized
8996 - change disks into single-master mode
8999 instance = self.instance
9000 target_node = self.target_node
9001 source_node = self.source_node
9003 # Check for hypervisor version mismatch and warn the user.
9004 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9005 None, [self.instance.hypervisor], False)
9006 for ninfo in nodeinfo.values():
9007 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9009 (_, _, (src_info, )) = nodeinfo[source_node].payload
9010 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9012 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9013 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9014 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9015 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9016 if src_version != dst_version:
9017 self.feedback_fn("* warning: hypervisor version mismatch between"
9018 " source (%s) and target (%s) node" %
9019 (src_version, dst_version))
9021 self.feedback_fn("* checking disk consistency between source and target")
9022 for (idx, dev) in enumerate(instance.disks):
9023 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9024 raise errors.OpExecError("Disk %s is degraded or not fully"
9025 " synchronized on target node,"
9026 " aborting migration" % idx)
9028 if self.current_mem > self.tgt_free_mem:
9029 if not self.allow_runtime_changes:
9030 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9031 " free memory to fit instance %s on target"
9032 " node %s (have %dMB, need %dMB)" %
9033 (instance.name, target_node,
9034 self.tgt_free_mem, self.current_mem))
9035 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9036 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9039 rpcres.Raise("Cannot modify instance runtime memory")
9041 # First get the migration information from the remote node
9042 result = self.rpc.call_migration_info(source_node, instance)
9043 msg = result.fail_msg
9045 log_err = ("Failed fetching source migration information from %s: %s" %
9047 logging.error(log_err)
9048 raise errors.OpExecError(log_err)
9050 self.migration_info = migration_info = result.payload
9052 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9053 # Then switch the disks to master/master mode
9054 self._EnsureSecondary(target_node)
9055 self._GoStandalone()
9056 self._GoReconnect(True)
9057 self._WaitUntilSync()
9059 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9060 result = self.rpc.call_accept_instance(target_node,
9063 self.nodes_ip[target_node])
9065 msg = result.fail_msg
9067 logging.error("Instance pre-migration failed, trying to revert"
9068 " disk status: %s", msg)
9069 self.feedback_fn("Pre-migration failed, aborting")
9070 self._AbortMigration()
9071 self._RevertDiskStatus()
9072 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9073 (instance.name, msg))
9075 self.feedback_fn("* migrating instance to %s" % target_node)
9076 result = self.rpc.call_instance_migrate(source_node, instance,
9077 self.nodes_ip[target_node],
9079 msg = result.fail_msg
9081 logging.error("Instance migration failed, trying to revert"
9082 " disk status: %s", msg)
9083 self.feedback_fn("Migration failed, aborting")
9084 self._AbortMigration()
9085 self._RevertDiskStatus()
9086 raise errors.OpExecError("Could not migrate instance %s: %s" %
9087 (instance.name, msg))
9089 self.feedback_fn("* starting memory transfer")
9090 last_feedback = time.time()
9092 result = self.rpc.call_instance_get_migration_status(source_node,
9094 msg = result.fail_msg
9095 ms = result.payload # MigrationStatus instance
9096 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9097 logging.error("Instance migration failed, trying to revert"
9098 " disk status: %s", msg)
9099 self.feedback_fn("Migration failed, aborting")
9100 self._AbortMigration()
9101 self._RevertDiskStatus()
9103 msg = "hypervisor returned failure"
9104 raise errors.OpExecError("Could not migrate instance %s: %s" %
9105 (instance.name, msg))
9107 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9108 self.feedback_fn("* memory transfer complete")
9111 if (utils.TimeoutExpired(last_feedback,
9112 self._MIGRATION_FEEDBACK_INTERVAL) and
9113 ms.transferred_ram is not None):
9114 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9115 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9116 last_feedback = time.time()
9118 time.sleep(self._MIGRATION_POLL_INTERVAL)
9120 result = self.rpc.call_instance_finalize_migration_src(source_node,
9124 msg = result.fail_msg
9126 logging.error("Instance migration succeeded, but finalization failed"
9127 " on the source node: %s", msg)
9128 raise errors.OpExecError("Could not finalize instance migration: %s" %
9131 instance.primary_node = target_node
9133 # distribute new instance config to the other nodes
9134 self.cfg.Update(instance, self.feedback_fn)
9136 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9140 msg = result.fail_msg
9142 logging.error("Instance migration succeeded, but finalization failed"
9143 " on the target node: %s", msg)
9144 raise errors.OpExecError("Could not finalize instance migration: %s" %
9147 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9148 self._EnsureSecondary(source_node)
9149 self._WaitUntilSync()
9150 self._GoStandalone()
9151 self._GoReconnect(False)
9152 self._WaitUntilSync()
9154 # If the instance's disk template is `rbd' or `ext' and there was a
9155 # successful migration, unmap the device from the source node.
9156 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9157 disks = _ExpandCheckDisks(instance, instance.disks)
9158 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9160 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9161 msg = result.fail_msg
9163 logging.error("Migration was successful, but couldn't unmap the"
9164 " block device %s on source node %s: %s",
9165 disk.iv_name, source_node, msg)
9166 logging.error("You need to unmap the device %s manually on %s",
9167 disk.iv_name, source_node)
9169 self.feedback_fn("* done")
9171 def _ExecFailover(self):
9172 """Failover an instance.
9174 The failover is done by shutting it down on its present node and
9175 starting it on the secondary.
9178 instance = self.instance
9179 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9181 source_node = instance.primary_node
9182 target_node = self.target_node
9184 if instance.admin_state == constants.ADMINST_UP:
9185 self.feedback_fn("* checking disk consistency between source and target")
9186 for (idx, dev) in enumerate(instance.disks):
9187 # for drbd, these are drbd over lvm
9188 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9190 if primary_node.offline:
9191 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9193 (primary_node.name, idx, target_node))
9194 elif not self.ignore_consistency:
9195 raise errors.OpExecError("Disk %s is degraded on target node,"
9196 " aborting failover" % idx)
9198 self.feedback_fn("* not checking disk consistency as instance is not"
9201 self.feedback_fn("* shutting down instance on source node")
9202 logging.info("Shutting down instance %s on node %s",
9203 instance.name, source_node)
9205 result = self.rpc.call_instance_shutdown(source_node, instance,
9206 self.shutdown_timeout)
9207 msg = result.fail_msg
9209 if self.ignore_consistency or primary_node.offline:
9210 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9211 " proceeding anyway; please make sure node"
9212 " %s is down; error details: %s",
9213 instance.name, source_node, source_node, msg)
9215 raise errors.OpExecError("Could not shutdown instance %s on"
9217 (instance.name, source_node, msg))
9219 self.feedback_fn("* deactivating the instance's disks on source node")
9220 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9221 raise errors.OpExecError("Can't shut down the instance's disks")
9223 instance.primary_node = target_node
9224 # distribute new instance config to the other nodes
9225 self.cfg.Update(instance, self.feedback_fn)
9227 # Only start the instance if it's marked as up
9228 if instance.admin_state == constants.ADMINST_UP:
9229 self.feedback_fn("* activating the instance's disks on target node %s" %
9231 logging.info("Starting instance %s on node %s",
9232 instance.name, target_node)
9234 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9235 ignore_secondaries=True)
9237 _ShutdownInstanceDisks(self.lu, instance)
9238 raise errors.OpExecError("Can't activate the instance's disks")
9240 self.feedback_fn("* starting the instance on the target node %s" %
9242 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9244 msg = result.fail_msg
9246 _ShutdownInstanceDisks(self.lu, instance)
9247 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9248 (instance.name, target_node, msg))
9250 def Exec(self, feedback_fn):
9251 """Perform the migration.
9254 self.feedback_fn = feedback_fn
9255 self.source_node = self.instance.primary_node
9257 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9258 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9259 self.target_node = self.instance.secondary_nodes[0]
9260 # Otherwise self.target_node has been populated either
9261 # directly, or through an iallocator.
9263 self.all_nodes = [self.source_node, self.target_node]
9264 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9265 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9268 feedback_fn("Failover instance %s" % self.instance.name)
9269 self._ExecFailover()
9271 feedback_fn("Migrating instance %s" % self.instance.name)
9274 return self._ExecCleanup()
9276 return self._ExecMigration()
9279 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9281 """Wrapper around L{_CreateBlockDevInner}.
9283 This method annotates the root device first.
9286 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9287 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9288 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9289 force_open, excl_stor)
9292 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9293 info, force_open, excl_stor):
9294 """Create a tree of block devices on a given node.
9296 If this device type has to be created on secondaries, create it and
9299 If not, just recurse to children keeping the same 'force' value.
9301 @attention: The device has to be annotated already.
9303 @param lu: the lu on whose behalf we execute
9304 @param node: the node on which to create the device
9305 @type instance: L{objects.Instance}
9306 @param instance: the instance which owns the device
9307 @type device: L{objects.Disk}
9308 @param device: the device to create
9309 @type force_create: boolean
9310 @param force_create: whether to force creation of this device; this
9311 will be change to True whenever we find a device which has
9312 CreateOnSecondary() attribute
9313 @param info: the extra 'metadata' we should attach to the device
9314 (this will be represented as a LVM tag)
9315 @type force_open: boolean
9316 @param force_open: this parameter will be passes to the
9317 L{backend.BlockdevCreate} function where it specifies
9318 whether we run on primary or not, and it affects both
9319 the child assembly and the device own Open() execution
9320 @type excl_stor: boolean
9321 @param excl_stor: Whether exclusive_storage is active for the node
9324 if device.CreateOnSecondary():
9328 for child in device.children:
9329 _CreateBlockDevInner(lu, node, instance, child, force_create,
9330 info, force_open, excl_stor)
9332 if not force_create:
9335 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9339 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9341 """Create a single block device on a given node.
9343 This will not recurse over children of the device, so they must be
9346 @param lu: the lu on whose behalf we execute
9347 @param node: the node on which to create the device
9348 @type instance: L{objects.Instance}
9349 @param instance: the instance which owns the device
9350 @type device: L{objects.Disk}
9351 @param device: the device to create
9352 @param info: the extra 'metadata' we should attach to the device
9353 (this will be represented as a LVM tag)
9354 @type force_open: boolean
9355 @param force_open: this parameter will be passes to the
9356 L{backend.BlockdevCreate} function where it specifies
9357 whether we run on primary or not, and it affects both
9358 the child assembly and the device own Open() execution
9359 @type excl_stor: boolean
9360 @param excl_stor: Whether exclusive_storage is active for the node
9363 lu.cfg.SetDiskID(device, node)
9364 result = lu.rpc.call_blockdev_create(node, device, device.size,
9365 instance.name, force_open, info,
9367 result.Raise("Can't create block device %s on"
9368 " node %s for instance %s" % (device, node, instance.name))
9369 if device.physical_id is None:
9370 device.physical_id = result.payload
9373 def _GenerateUniqueNames(lu, exts):
9374 """Generate a suitable LV name.
9376 This will generate a logical volume name for the given instance.
9381 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9382 results.append("%s%s" % (new_id, val))
9386 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9387 iv_name, p_minor, s_minor):
9388 """Generate a drbd8 device complete with its children.
9391 assert len(vgnames) == len(names) == 2
9392 port = lu.cfg.AllocatePort()
9393 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9395 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9396 logical_id=(vgnames[0], names[0]),
9398 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9399 size=constants.DRBD_META_SIZE,
9400 logical_id=(vgnames[1], names[1]),
9402 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9403 logical_id=(primary, secondary, port,
9406 children=[dev_data, dev_meta],
9407 iv_name=iv_name, params={})
9411 _DISK_TEMPLATE_NAME_PREFIX = {
9412 constants.DT_PLAIN: "",
9413 constants.DT_RBD: ".rbd",
9414 constants.DT_EXT: ".ext",
9418 _DISK_TEMPLATE_DEVICE_TYPE = {
9419 constants.DT_PLAIN: constants.LD_LV,
9420 constants.DT_FILE: constants.LD_FILE,
9421 constants.DT_SHARED_FILE: constants.LD_FILE,
9422 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9423 constants.DT_RBD: constants.LD_RBD,
9424 constants.DT_EXT: constants.LD_EXT,
9428 def _GenerateDiskTemplate(
9429 lu, template_name, instance_name, primary_node, secondary_nodes,
9430 disk_info, file_storage_dir, file_driver, base_index,
9431 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9432 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9433 """Generate the entire disk layout for a given template type.
9436 vgname = lu.cfg.GetVGName()
9437 disk_count = len(disk_info)
9440 if template_name == constants.DT_DISKLESS:
9442 elif template_name == constants.DT_DRBD8:
9443 if len(secondary_nodes) != 1:
9444 raise errors.ProgrammerError("Wrong template configuration")
9445 remote_node = secondary_nodes[0]
9446 minors = lu.cfg.AllocateDRBDMinor(
9447 [primary_node, remote_node] * len(disk_info), instance_name)
9449 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9451 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9454 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9455 for i in range(disk_count)]):
9456 names.append(lv_prefix + "_data")
9457 names.append(lv_prefix + "_meta")
9458 for idx, disk in enumerate(disk_info):
9459 disk_index = idx + base_index
9460 data_vg = disk.get(constants.IDISK_VG, vgname)
9461 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9462 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9463 disk[constants.IDISK_SIZE],
9465 names[idx * 2:idx * 2 + 2],
9466 "disk/%d" % disk_index,
9467 minors[idx * 2], minors[idx * 2 + 1])
9468 disk_dev.mode = disk[constants.IDISK_MODE]
9469 disks.append(disk_dev)
9472 raise errors.ProgrammerError("Wrong template configuration")
9474 if template_name == constants.DT_FILE:
9476 elif template_name == constants.DT_SHARED_FILE:
9477 _req_shr_file_storage()
9479 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9480 if name_prefix is None:
9483 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9484 (name_prefix, base_index + i)
9485 for i in range(disk_count)])
9487 if template_name == constants.DT_PLAIN:
9489 def logical_id_fn(idx, _, disk):
9490 vg = disk.get(constants.IDISK_VG, vgname)
9491 return (vg, names[idx])
9493 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9495 lambda _, disk_index, disk: (file_driver,
9496 "%s/disk%d" % (file_storage_dir,
9498 elif template_name == constants.DT_BLOCK:
9500 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9501 disk[constants.IDISK_ADOPT])
9502 elif template_name == constants.DT_RBD:
9503 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9504 elif template_name == constants.DT_EXT:
9505 def logical_id_fn(idx, _, disk):
9506 provider = disk.get(constants.IDISK_PROVIDER, None)
9507 if provider is None:
9508 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9509 " not found", constants.DT_EXT,
9510 constants.IDISK_PROVIDER)
9511 return (provider, names[idx])
9513 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9515 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9517 for idx, disk in enumerate(disk_info):
9519 # Only for the Ext template add disk_info to params
9520 if template_name == constants.DT_EXT:
9521 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9523 if key not in constants.IDISK_PARAMS:
9524 params[key] = disk[key]
9525 disk_index = idx + base_index
9526 size = disk[constants.IDISK_SIZE]
9527 feedback_fn("* disk %s, size %s" %
9528 (disk_index, utils.FormatUnit(size, "h")))
9529 disks.append(objects.Disk(dev_type=dev_type, size=size,
9530 logical_id=logical_id_fn(idx, disk_index, disk),
9531 iv_name="disk/%d" % disk_index,
9532 mode=disk[constants.IDISK_MODE],
9538 def _GetInstanceInfoText(instance):
9539 """Compute that text that should be added to the disk's metadata.
9542 return "originstname+%s" % instance.name
9545 def _CalcEta(time_taken, written, total_size):
9546 """Calculates the ETA based on size written and total size.
9548 @param time_taken: The time taken so far
9549 @param written: amount written so far
9550 @param total_size: The total size of data to be written
9551 @return: The remaining time in seconds
9554 avg_time = time_taken / float(written)
9555 return (total_size - written) * avg_time
9558 def _WipeDisks(lu, instance, disks=None):
9559 """Wipes instance disks.
9561 @type lu: L{LogicalUnit}
9562 @param lu: the logical unit on whose behalf we execute
9563 @type instance: L{objects.Instance}
9564 @param instance: the instance whose disks we should create
9565 @return: the success of the wipe
9568 node = instance.primary_node
9571 disks = [(idx, disk, 0)
9572 for (idx, disk) in enumerate(instance.disks)]
9574 for (_, device, _) in disks:
9575 lu.cfg.SetDiskID(device, node)
9577 logging.info("Pausing synchronization of disks of instance '%s'",
9579 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9580 (map(compat.snd, disks),
9583 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9585 for idx, success in enumerate(result.payload):
9587 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9588 " failed", idx, instance.name)
9591 for (idx, device, offset) in disks:
9592 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9593 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9595 int(min(constants.MAX_WIPE_CHUNK,
9596 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9600 start_time = time.time()
9605 info_text = (" (from %s to %s)" %
9606 (utils.FormatUnit(offset, "h"),
9607 utils.FormatUnit(size, "h")))
9609 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9611 logging.info("Wiping disk %d for instance %s on node %s using"
9612 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9614 while offset < size:
9615 wipe_size = min(wipe_chunk_size, size - offset)
9617 logging.debug("Wiping disk %d, offset %s, chunk %s",
9618 idx, offset, wipe_size)
9620 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9622 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9623 (idx, offset, wipe_size))
9627 if now - last_output >= 60:
9628 eta = _CalcEta(now - start_time, offset, size)
9629 lu.LogInfo(" - done: %.1f%% ETA: %s",
9630 offset / float(size) * 100, utils.FormatSeconds(eta))
9633 logging.info("Resuming synchronization of disks for instance '%s'",
9636 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9637 (map(compat.snd, disks),
9642 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9643 node, result.fail_msg)
9645 for idx, success in enumerate(result.payload):
9647 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9648 " failed", idx, instance.name)
9651 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9652 """Create all disks for an instance.
9654 This abstracts away some work from AddInstance.
9656 @type lu: L{LogicalUnit}
9657 @param lu: the logical unit on whose behalf we execute
9658 @type instance: L{objects.Instance}
9659 @param instance: the instance whose disks we should create
9661 @param to_skip: list of indices to skip
9662 @type target_node: string
9663 @param target_node: if passed, overrides the target node for creation
9665 @return: the success of the creation
9668 info = _GetInstanceInfoText(instance)
9669 if target_node is None:
9670 pnode = instance.primary_node
9671 all_nodes = instance.all_nodes
9676 if instance.disk_template in constants.DTS_FILEBASED:
9677 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9678 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9680 result.Raise("Failed to create directory '%s' on"
9681 " node %s" % (file_storage_dir, pnode))
9683 # Note: this needs to be kept in sync with adding of disks in
9684 # LUInstanceSetParams
9685 for idx, device in enumerate(instance.disks):
9686 if to_skip and idx in to_skip:
9688 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9690 for node in all_nodes:
9691 f_create = node == pnode
9692 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9695 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9696 """Remove all disks for an instance.
9698 This abstracts away some work from `AddInstance()` and
9699 `RemoveInstance()`. Note that in case some of the devices couldn't
9700 be removed, the removal will continue with the other ones (compare
9701 with `_CreateDisks()`).
9703 @type lu: L{LogicalUnit}
9704 @param lu: the logical unit on whose behalf we execute
9705 @type instance: L{objects.Instance}
9706 @param instance: the instance whose disks we should remove
9707 @type target_node: string
9708 @param target_node: used to override the node on which to remove the disks
9710 @return: the success of the removal
9713 logging.info("Removing block devices for instance %s", instance.name)
9716 ports_to_release = set()
9717 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9718 for (idx, device) in enumerate(anno_disks):
9720 edata = [(target_node, device)]
9722 edata = device.ComputeNodeTree(instance.primary_node)
9723 for node, disk in edata:
9724 lu.cfg.SetDiskID(disk, node)
9725 result = lu.rpc.call_blockdev_remove(node, disk)
9727 lu.LogWarning("Could not remove disk %s on node %s,"
9728 " continuing anyway: %s", idx, node, result.fail_msg)
9729 if not (result.offline and node != instance.primary_node):
9732 # if this is a DRBD disk, return its port to the pool
9733 if device.dev_type in constants.LDS_DRBD:
9734 ports_to_release.add(device.logical_id[2])
9736 if all_result or ignore_failures:
9737 for port in ports_to_release:
9738 lu.cfg.AddTcpUdpPort(port)
9740 if instance.disk_template in constants.DTS_FILEBASED:
9741 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9745 tgt = instance.primary_node
9746 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9748 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9749 file_storage_dir, instance.primary_node, result.fail_msg)
9755 def _ComputeDiskSizePerVG(disk_template, disks):
9756 """Compute disk size requirements in the volume group
9759 def _compute(disks, payload):
9760 """Universal algorithm.
9765 vgs[disk[constants.IDISK_VG]] = \
9766 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9770 # Required free disk space as a function of disk and swap space
9772 constants.DT_DISKLESS: {},
9773 constants.DT_PLAIN: _compute(disks, 0),
9774 # 128 MB are added for drbd metadata for each disk
9775 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9776 constants.DT_FILE: {},
9777 constants.DT_SHARED_FILE: {},
9780 if disk_template not in req_size_dict:
9781 raise errors.ProgrammerError("Disk template '%s' size requirement"
9782 " is unknown" % disk_template)
9784 return req_size_dict[disk_template]
9787 def _FilterVmNodes(lu, nodenames):
9788 """Filters out non-vm_capable nodes from a list.
9790 @type lu: L{LogicalUnit}
9791 @param lu: the logical unit for which we check
9792 @type nodenames: list
9793 @param nodenames: the list of nodes on which we should check
9795 @return: the list of vm-capable nodes
9798 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9799 return [name for name in nodenames if name not in vm_nodes]
9802 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9803 """Hypervisor parameter validation.
9805 This function abstract the hypervisor parameter validation to be
9806 used in both instance create and instance modify.
9808 @type lu: L{LogicalUnit}
9809 @param lu: the logical unit for which we check
9810 @type nodenames: list
9811 @param nodenames: the list of nodes on which we should check
9812 @type hvname: string
9813 @param hvname: the name of the hypervisor we should use
9814 @type hvparams: dict
9815 @param hvparams: the parameters which we need to check
9816 @raise errors.OpPrereqError: if the parameters are not valid
9819 nodenames = _FilterVmNodes(lu, nodenames)
9821 cluster = lu.cfg.GetClusterInfo()
9822 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9824 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9825 for node in nodenames:
9829 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9832 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9833 """OS parameters validation.
9835 @type lu: L{LogicalUnit}
9836 @param lu: the logical unit for which we check
9837 @type required: boolean
9838 @param required: whether the validation should fail if the OS is not
9840 @type nodenames: list
9841 @param nodenames: the list of nodes on which we should check
9842 @type osname: string
9843 @param osname: the name of the hypervisor we should use
9844 @type osparams: dict
9845 @param osparams: the parameters which we need to check
9846 @raise errors.OpPrereqError: if the parameters are not valid
9849 nodenames = _FilterVmNodes(lu, nodenames)
9850 result = lu.rpc.call_os_validate(nodenames, required, osname,
9851 [constants.OS_VALIDATE_PARAMETERS],
9853 for node, nres in result.items():
9854 # we don't check for offline cases since this should be run only
9855 # against the master node and/or an instance's nodes
9856 nres.Raise("OS Parameters validation failed on node %s" % node)
9857 if not nres.payload:
9858 lu.LogInfo("OS %s not found on node %s, validation skipped",
9862 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9863 """Wrapper around IAReqInstanceAlloc.
9865 @param op: The instance opcode
9866 @param disks: The computed disks
9867 @param nics: The computed nics
9868 @param beparams: The full filled beparams
9869 @param node_whitelist: List of nodes which should appear as online to the
9870 allocator (unless the node is already marked offline)
9872 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9875 spindle_use = beparams[constants.BE_SPINDLE_USE]
9876 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9877 disk_template=op.disk_template,
9880 vcpus=beparams[constants.BE_VCPUS],
9881 memory=beparams[constants.BE_MAXMEM],
9882 spindle_use=spindle_use,
9884 nics=[n.ToDict() for n in nics],
9885 hypervisor=op.hypervisor,
9886 node_whitelist=node_whitelist)
9889 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9890 """Computes the nics.
9892 @param op: The instance opcode
9893 @param cluster: Cluster configuration object
9894 @param default_ip: The default ip to assign
9895 @param cfg: An instance of the configuration object
9896 @param ec_id: Execution context ID
9898 @returns: The build up nics
9903 nic_mode_req = nic.get(constants.INIC_MODE, None)
9904 nic_mode = nic_mode_req
9905 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9906 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9908 net = nic.get(constants.INIC_NETWORK, None)
9909 link = nic.get(constants.NIC_LINK, None)
9910 ip = nic.get(constants.INIC_IP, None)
9912 if net is None or net.lower() == constants.VALUE_NONE:
9915 if nic_mode_req is not None or link is not None:
9916 raise errors.OpPrereqError("If network is given, no mode or link"
9917 " is allowed to be passed",
9920 # ip validity checks
9921 if ip is None or ip.lower() == constants.VALUE_NONE:
9923 elif ip.lower() == constants.VALUE_AUTO:
9924 if not op.name_check:
9925 raise errors.OpPrereqError("IP address set to auto but name checks"
9926 " have been skipped",
9930 # We defer pool operations until later, so that the iallocator has
9931 # filled in the instance's node(s) dimara
9932 if ip.lower() == constants.NIC_IP_POOL:
9934 raise errors.OpPrereqError("if ip=pool, parameter network"
9935 " must be passed too",
9938 elif not netutils.IPAddress.IsValid(ip):
9939 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9944 # TODO: check the ip address for uniqueness
9945 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9946 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9949 # MAC address verification
9950 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9951 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9952 mac = utils.NormalizeAndValidateMac(mac)
9955 # TODO: We need to factor this out
9956 cfg.ReserveMAC(mac, ec_id)
9957 except errors.ReservationError:
9958 raise errors.OpPrereqError("MAC address %s already in use"
9959 " in cluster" % mac,
9960 errors.ECODE_NOTUNIQUE)
9962 # Build nic parameters
9965 nicparams[constants.NIC_MODE] = nic_mode
9967 nicparams[constants.NIC_LINK] = link
9969 check_params = cluster.SimpleFillNIC(nicparams)
9970 objects.NIC.CheckParameterSyntax(check_params)
9971 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9972 network=net, nicparams=nicparams))
9977 def _ComputeDisks(op, default_vg):
9978 """Computes the instance disks.
9980 @param op: The instance opcode
9981 @param default_vg: The default_vg to assume
9983 @return: The computed disks
9987 for disk in op.disks:
9988 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9989 if mode not in constants.DISK_ACCESS_SET:
9990 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9991 mode, errors.ECODE_INVAL)
9992 size = disk.get(constants.IDISK_SIZE, None)
9994 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9997 except (TypeError, ValueError):
9998 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10001 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10002 if ext_provider and op.disk_template != constants.DT_EXT:
10003 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10004 " disk template, not %s" %
10005 (constants.IDISK_PROVIDER, constants.DT_EXT,
10006 op.disk_template), errors.ECODE_INVAL)
10008 data_vg = disk.get(constants.IDISK_VG, default_vg)
10010 constants.IDISK_SIZE: size,
10011 constants.IDISK_MODE: mode,
10012 constants.IDISK_VG: data_vg,
10015 if constants.IDISK_METAVG in disk:
10016 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10017 if constants.IDISK_ADOPT in disk:
10018 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10020 # For extstorage, demand the `provider' option and add any
10021 # additional parameters (ext-params) to the dict
10022 if op.disk_template == constants.DT_EXT:
10024 new_disk[constants.IDISK_PROVIDER] = ext_provider
10026 if key not in constants.IDISK_PARAMS:
10027 new_disk[key] = disk[key]
10029 raise errors.OpPrereqError("Missing provider for template '%s'" %
10030 constants.DT_EXT, errors.ECODE_INVAL)
10032 disks.append(new_disk)
10037 def _ComputeFullBeParams(op, cluster):
10038 """Computes the full beparams.
10040 @param op: The instance opcode
10041 @param cluster: The cluster config object
10043 @return: The fully filled beparams
10046 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10047 for param, value in op.beparams.iteritems():
10048 if value == constants.VALUE_AUTO:
10049 op.beparams[param] = default_beparams[param]
10050 objects.UpgradeBeParams(op.beparams)
10051 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10052 return cluster.SimpleFillBE(op.beparams)
10055 def _CheckOpportunisticLocking(op):
10056 """Generate error if opportunistic locking is not possible.
10059 if op.opportunistic_locking and not op.iallocator:
10060 raise errors.OpPrereqError("Opportunistic locking is only available in"
10061 " combination with an instance allocator",
10062 errors.ECODE_INVAL)
10065 class LUInstanceCreate(LogicalUnit):
10066 """Create an instance.
10069 HPATH = "instance-add"
10070 HTYPE = constants.HTYPE_INSTANCE
10073 def CheckArguments(self):
10074 """Check arguments.
10077 # do not require name_check to ease forward/backward compatibility
10079 if self.op.no_install and self.op.start:
10080 self.LogInfo("No-installation mode selected, disabling startup")
10081 self.op.start = False
10082 # validate/normalize the instance name
10083 self.op.instance_name = \
10084 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10086 if self.op.ip_check and not self.op.name_check:
10087 # TODO: make the ip check more flexible and not depend on the name check
10088 raise errors.OpPrereqError("Cannot do IP address check without a name"
10089 " check", errors.ECODE_INVAL)
10091 # check nics' parameter names
10092 for nic in self.op.nics:
10093 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10095 # check disks. parameter names and consistent adopt/no-adopt strategy
10096 has_adopt = has_no_adopt = False
10097 for disk in self.op.disks:
10098 if self.op.disk_template != constants.DT_EXT:
10099 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10100 if constants.IDISK_ADOPT in disk:
10103 has_no_adopt = True
10104 if has_adopt and has_no_adopt:
10105 raise errors.OpPrereqError("Either all disks are adopted or none is",
10106 errors.ECODE_INVAL)
10108 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10109 raise errors.OpPrereqError("Disk adoption is not supported for the"
10110 " '%s' disk template" %
10111 self.op.disk_template,
10112 errors.ECODE_INVAL)
10113 if self.op.iallocator is not None:
10114 raise errors.OpPrereqError("Disk adoption not allowed with an"
10115 " iallocator script", errors.ECODE_INVAL)
10116 if self.op.mode == constants.INSTANCE_IMPORT:
10117 raise errors.OpPrereqError("Disk adoption not allowed for"
10118 " instance import", errors.ECODE_INVAL)
10120 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10121 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10122 " but no 'adopt' parameter given" %
10123 self.op.disk_template,
10124 errors.ECODE_INVAL)
10126 self.adopt_disks = has_adopt
10128 # instance name verification
10129 if self.op.name_check:
10130 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10131 self.op.instance_name = self.hostname1.name
10132 # used in CheckPrereq for ip ping check
10133 self.check_ip = self.hostname1.ip
10135 self.check_ip = None
10137 # file storage checks
10138 if (self.op.file_driver and
10139 not self.op.file_driver in constants.FILE_DRIVER):
10140 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10141 self.op.file_driver, errors.ECODE_INVAL)
10143 if self.op.disk_template == constants.DT_FILE:
10144 opcodes.RequireFileStorage()
10145 elif self.op.disk_template == constants.DT_SHARED_FILE:
10146 opcodes.RequireSharedFileStorage()
10148 ### Node/iallocator related checks
10149 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10151 if self.op.pnode is not None:
10152 if self.op.disk_template in constants.DTS_INT_MIRROR:
10153 if self.op.snode is None:
10154 raise errors.OpPrereqError("The networked disk templates need"
10155 " a mirror node", errors.ECODE_INVAL)
10156 elif self.op.snode:
10157 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10159 self.op.snode = None
10161 _CheckOpportunisticLocking(self.op)
10163 self._cds = _GetClusterDomainSecret()
10165 if self.op.mode == constants.INSTANCE_IMPORT:
10166 # On import force_variant must be True, because if we forced it at
10167 # initial install, our only chance when importing it back is that it
10169 self.op.force_variant = True
10171 if self.op.no_install:
10172 self.LogInfo("No-installation mode has no effect during import")
10174 elif self.op.mode == constants.INSTANCE_CREATE:
10175 if self.op.os_type is None:
10176 raise errors.OpPrereqError("No guest OS specified",
10177 errors.ECODE_INVAL)
10178 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10179 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10180 " installation" % self.op.os_type,
10181 errors.ECODE_STATE)
10182 if self.op.disk_template is None:
10183 raise errors.OpPrereqError("No disk template specified",
10184 errors.ECODE_INVAL)
10186 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10187 # Check handshake to ensure both clusters have the same domain secret
10188 src_handshake = self.op.source_handshake
10189 if not src_handshake:
10190 raise errors.OpPrereqError("Missing source handshake",
10191 errors.ECODE_INVAL)
10193 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10196 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10197 errors.ECODE_INVAL)
10199 # Load and check source CA
10200 self.source_x509_ca_pem = self.op.source_x509_ca
10201 if not self.source_x509_ca_pem:
10202 raise errors.OpPrereqError("Missing source X509 CA",
10203 errors.ECODE_INVAL)
10206 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10208 except OpenSSL.crypto.Error, err:
10209 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10210 (err, ), errors.ECODE_INVAL)
10212 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10213 if errcode is not None:
10214 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10215 errors.ECODE_INVAL)
10217 self.source_x509_ca = cert
10219 src_instance_name = self.op.source_instance_name
10220 if not src_instance_name:
10221 raise errors.OpPrereqError("Missing source instance name",
10222 errors.ECODE_INVAL)
10224 self.source_instance_name = \
10225 netutils.GetHostname(name=src_instance_name).name
10228 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10229 self.op.mode, errors.ECODE_INVAL)
10231 def ExpandNames(self):
10232 """ExpandNames for CreateInstance.
10234 Figure out the right locks for instance creation.
10237 self.needed_locks = {}
10239 instance_name = self.op.instance_name
10240 # this is just a preventive check, but someone might still add this
10241 # instance in the meantime, and creation will fail at lock-add time
10242 if instance_name in self.cfg.GetInstanceList():
10243 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10244 instance_name, errors.ECODE_EXISTS)
10246 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10248 if self.op.iallocator:
10249 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10250 # specifying a group on instance creation and then selecting nodes from
10252 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10253 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10255 if self.op.opportunistic_locking:
10256 self.opportunistic_locks[locking.LEVEL_NODE] = True
10257 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10259 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10260 nodelist = [self.op.pnode]
10261 if self.op.snode is not None:
10262 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10263 nodelist.append(self.op.snode)
10264 self.needed_locks[locking.LEVEL_NODE] = nodelist
10266 # in case of import lock the source node too
10267 if self.op.mode == constants.INSTANCE_IMPORT:
10268 src_node = self.op.src_node
10269 src_path = self.op.src_path
10271 if src_path is None:
10272 self.op.src_path = src_path = self.op.instance_name
10274 if src_node is None:
10275 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10276 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10277 self.op.src_node = None
10278 if os.path.isabs(src_path):
10279 raise errors.OpPrereqError("Importing an instance from a path"
10280 " requires a source node option",
10281 errors.ECODE_INVAL)
10283 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10284 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10285 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10286 if not os.path.isabs(src_path):
10287 self.op.src_path = src_path = \
10288 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10290 self.needed_locks[locking.LEVEL_NODE_RES] = \
10291 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10293 def _RunAllocator(self):
10294 """Run the allocator based on input opcode.
10297 if self.op.opportunistic_locking:
10298 # Only consider nodes for which a lock is held
10299 node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10301 node_whitelist = None
10303 #TODO Export network to iallocator so that it chooses a pnode
10304 # in a nodegroup that has the desired network connected to
10305 req = _CreateInstanceAllocRequest(self.op, self.disks,
10306 self.nics, self.be_full,
10308 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10310 ial.Run(self.op.iallocator)
10312 if not ial.success:
10313 # When opportunistic locks are used only a temporary failure is generated
10314 if self.op.opportunistic_locking:
10315 ecode = errors.ECODE_TEMP_NORES
10317 ecode = errors.ECODE_NORES
10319 raise errors.OpPrereqError("Can't compute nodes using"
10320 " iallocator '%s': %s" %
10321 (self.op.iallocator, ial.info),
10324 self.op.pnode = ial.result[0]
10325 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10326 self.op.instance_name, self.op.iallocator,
10327 utils.CommaJoin(ial.result))
10329 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10331 if req.RequiredNodes() == 2:
10332 self.op.snode = ial.result[1]
10334 def BuildHooksEnv(self):
10335 """Build hooks env.
10337 This runs on master, primary and secondary nodes of the instance.
10341 "ADD_MODE": self.op.mode,
10343 if self.op.mode == constants.INSTANCE_IMPORT:
10344 env["SRC_NODE"] = self.op.src_node
10345 env["SRC_PATH"] = self.op.src_path
10346 env["SRC_IMAGES"] = self.src_images
10348 env.update(_BuildInstanceHookEnv(
10349 name=self.op.instance_name,
10350 primary_node=self.op.pnode,
10351 secondary_nodes=self.secondaries,
10352 status=self.op.start,
10353 os_type=self.op.os_type,
10354 minmem=self.be_full[constants.BE_MINMEM],
10355 maxmem=self.be_full[constants.BE_MAXMEM],
10356 vcpus=self.be_full[constants.BE_VCPUS],
10357 nics=_NICListToTuple(self, self.nics),
10358 disk_template=self.op.disk_template,
10359 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10360 for d in self.disks],
10363 hypervisor_name=self.op.hypervisor,
10369 def BuildHooksNodes(self):
10370 """Build hooks nodes.
10373 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10376 def _ReadExportInfo(self):
10377 """Reads the export information from disk.
10379 It will override the opcode source node and path with the actual
10380 information, if these two were not specified before.
10382 @return: the export information
10385 assert self.op.mode == constants.INSTANCE_IMPORT
10387 src_node = self.op.src_node
10388 src_path = self.op.src_path
10390 if src_node is None:
10391 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10392 exp_list = self.rpc.call_export_list(locked_nodes)
10394 for node in exp_list:
10395 if exp_list[node].fail_msg:
10397 if src_path in exp_list[node].payload:
10399 self.op.src_node = src_node = node
10400 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10404 raise errors.OpPrereqError("No export found for relative path %s" %
10405 src_path, errors.ECODE_INVAL)
10407 _CheckNodeOnline(self, src_node)
10408 result = self.rpc.call_export_info(src_node, src_path)
10409 result.Raise("No export or invalid export found in dir %s" % src_path)
10411 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10412 if not export_info.has_section(constants.INISECT_EXP):
10413 raise errors.ProgrammerError("Corrupted export config",
10414 errors.ECODE_ENVIRON)
10416 ei_version = export_info.get(constants.INISECT_EXP, "version")
10417 if (int(ei_version) != constants.EXPORT_VERSION):
10418 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10419 (ei_version, constants.EXPORT_VERSION),
10420 errors.ECODE_ENVIRON)
10423 def _ReadExportParams(self, einfo):
10424 """Use export parameters as defaults.
10426 In case the opcode doesn't specify (as in override) some instance
10427 parameters, then try to use them from the export information, if
10428 that declares them.
10431 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10433 if self.op.disk_template is None:
10434 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10435 self.op.disk_template = einfo.get(constants.INISECT_INS,
10437 if self.op.disk_template not in constants.DISK_TEMPLATES:
10438 raise errors.OpPrereqError("Disk template specified in configuration"
10439 " file is not one of the allowed values:"
10441 " ".join(constants.DISK_TEMPLATES),
10442 errors.ECODE_INVAL)
10444 raise errors.OpPrereqError("No disk template specified and the export"
10445 " is missing the disk_template information",
10446 errors.ECODE_INVAL)
10448 if not self.op.disks:
10450 # TODO: import the disk iv_name too
10451 for idx in range(constants.MAX_DISKS):
10452 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10453 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10454 disks.append({constants.IDISK_SIZE: disk_sz})
10455 self.op.disks = disks
10456 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10457 raise errors.OpPrereqError("No disk info specified and the export"
10458 " is missing the disk information",
10459 errors.ECODE_INVAL)
10461 if not self.op.nics:
10463 for idx in range(constants.MAX_NICS):
10464 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10466 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10467 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10472 self.op.nics = nics
10474 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10475 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10477 if (self.op.hypervisor is None and
10478 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10479 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10481 if einfo.has_section(constants.INISECT_HYP):
10482 # use the export parameters but do not override the ones
10483 # specified by the user
10484 for name, value in einfo.items(constants.INISECT_HYP):
10485 if name not in self.op.hvparams:
10486 self.op.hvparams[name] = value
10488 if einfo.has_section(constants.INISECT_BEP):
10489 # use the parameters, without overriding
10490 for name, value in einfo.items(constants.INISECT_BEP):
10491 if name not in self.op.beparams:
10492 self.op.beparams[name] = value
10493 # Compatibility for the old "memory" be param
10494 if name == constants.BE_MEMORY:
10495 if constants.BE_MAXMEM not in self.op.beparams:
10496 self.op.beparams[constants.BE_MAXMEM] = value
10497 if constants.BE_MINMEM not in self.op.beparams:
10498 self.op.beparams[constants.BE_MINMEM] = value
10500 # try to read the parameters old style, from the main section
10501 for name in constants.BES_PARAMETERS:
10502 if (name not in self.op.beparams and
10503 einfo.has_option(constants.INISECT_INS, name)):
10504 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10506 if einfo.has_section(constants.INISECT_OSP):
10507 # use the parameters, without overriding
10508 for name, value in einfo.items(constants.INISECT_OSP):
10509 if name not in self.op.osparams:
10510 self.op.osparams[name] = value
10512 def _RevertToDefaults(self, cluster):
10513 """Revert the instance parameters to the default values.
10517 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10518 for name in self.op.hvparams.keys():
10519 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10520 del self.op.hvparams[name]
10522 be_defs = cluster.SimpleFillBE({})
10523 for name in self.op.beparams.keys():
10524 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10525 del self.op.beparams[name]
10527 nic_defs = cluster.SimpleFillNIC({})
10528 for nic in self.op.nics:
10529 for name in constants.NICS_PARAMETERS:
10530 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10533 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10534 for name in self.op.osparams.keys():
10535 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10536 del self.op.osparams[name]
10538 def _CalculateFileStorageDir(self):
10539 """Calculate final instance file storage dir.
10542 # file storage dir calculation/check
10543 self.instance_file_storage_dir = None
10544 if self.op.disk_template in constants.DTS_FILEBASED:
10545 # build the full file storage dir path
10548 if self.op.disk_template == constants.DT_SHARED_FILE:
10549 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10551 get_fsd_fn = self.cfg.GetFileStorageDir
10553 cfg_storagedir = get_fsd_fn()
10554 if not cfg_storagedir:
10555 raise errors.OpPrereqError("Cluster file storage dir not defined",
10556 errors.ECODE_STATE)
10557 joinargs.append(cfg_storagedir)
10559 if self.op.file_storage_dir is not None:
10560 joinargs.append(self.op.file_storage_dir)
10562 joinargs.append(self.op.instance_name)
10564 # pylint: disable=W0142
10565 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10567 def CheckPrereq(self): # pylint: disable=R0914
10568 """Check prerequisites.
10571 self._CalculateFileStorageDir()
10573 if self.op.mode == constants.INSTANCE_IMPORT:
10574 export_info = self._ReadExportInfo()
10575 self._ReadExportParams(export_info)
10576 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10578 self._old_instance_name = None
10580 if (not self.cfg.GetVGName() and
10581 self.op.disk_template not in constants.DTS_NOT_LVM):
10582 raise errors.OpPrereqError("Cluster does not support lvm-based"
10583 " instances", errors.ECODE_STATE)
10585 if (self.op.hypervisor is None or
10586 self.op.hypervisor == constants.VALUE_AUTO):
10587 self.op.hypervisor = self.cfg.GetHypervisorType()
10589 cluster = self.cfg.GetClusterInfo()
10590 enabled_hvs = cluster.enabled_hypervisors
10591 if self.op.hypervisor not in enabled_hvs:
10592 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10594 (self.op.hypervisor, ",".join(enabled_hvs)),
10595 errors.ECODE_STATE)
10597 # Check tag validity
10598 for tag in self.op.tags:
10599 objects.TaggableObject.ValidateTag(tag)
10601 # check hypervisor parameter syntax (locally)
10602 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10603 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10605 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10606 hv_type.CheckParameterSyntax(filled_hvp)
10607 self.hv_full = filled_hvp
10608 # check that we don't specify global parameters on an instance
10609 _CheckGlobalHvParams(self.op.hvparams)
10611 # fill and remember the beparams dict
10612 self.be_full = _ComputeFullBeParams(self.op, cluster)
10614 # build os parameters
10615 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10617 # now that hvp/bep are in final format, let's reset to defaults,
10619 if self.op.identify_defaults:
10620 self._RevertToDefaults(cluster)
10623 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10624 self.proc.GetECId())
10626 # disk checks/pre-build
10627 default_vg = self.cfg.GetVGName()
10628 self.disks = _ComputeDisks(self.op, default_vg)
10630 if self.op.mode == constants.INSTANCE_IMPORT:
10632 for idx in range(len(self.disks)):
10633 option = "disk%d_dump" % idx
10634 if export_info.has_option(constants.INISECT_INS, option):
10635 # FIXME: are the old os-es, disk sizes, etc. useful?
10636 export_name = export_info.get(constants.INISECT_INS, option)
10637 image = utils.PathJoin(self.op.src_path, export_name)
10638 disk_images.append(image)
10640 disk_images.append(False)
10642 self.src_images = disk_images
10644 if self.op.instance_name == self._old_instance_name:
10645 for idx, nic in enumerate(self.nics):
10646 if nic.mac == constants.VALUE_AUTO:
10647 nic_mac_ini = "nic%d_mac" % idx
10648 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10650 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10652 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10653 if self.op.ip_check:
10654 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10655 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10656 (self.check_ip, self.op.instance_name),
10657 errors.ECODE_NOTUNIQUE)
10659 #### mac address generation
10660 # By generating here the mac address both the allocator and the hooks get
10661 # the real final mac address rather than the 'auto' or 'generate' value.
10662 # There is a race condition between the generation and the instance object
10663 # creation, which means that we know the mac is valid now, but we're not
10664 # sure it will be when we actually add the instance. If things go bad
10665 # adding the instance will abort because of a duplicate mac, and the
10666 # creation job will fail.
10667 for nic in self.nics:
10668 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10669 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10673 if self.op.iallocator is not None:
10674 self._RunAllocator()
10676 # Release all unneeded node locks
10677 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10678 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10679 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10680 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10682 assert (self.owned_locks(locking.LEVEL_NODE) ==
10683 self.owned_locks(locking.LEVEL_NODE_RES)), \
10684 "Node locks differ from node resource locks"
10686 #### node related checks
10688 # check primary node
10689 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10690 assert self.pnode is not None, \
10691 "Cannot retrieve locked node %s" % self.op.pnode
10693 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10694 pnode.name, errors.ECODE_STATE)
10696 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10697 pnode.name, errors.ECODE_STATE)
10698 if not pnode.vm_capable:
10699 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10700 " '%s'" % pnode.name, errors.ECODE_STATE)
10702 self.secondaries = []
10704 # Fill in any IPs from IP pools. This must happen here, because we need to
10705 # know the nic's primary node, as specified by the iallocator
10706 for idx, nic in enumerate(self.nics):
10708 if net is not None:
10709 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10710 if netparams is None:
10711 raise errors.OpPrereqError("No netparams found for network"
10712 " %s. Propably not connected to"
10713 " node's %s nodegroup" %
10714 (net, self.pnode.name),
10715 errors.ECODE_INVAL)
10716 self.LogInfo("NIC/%d inherits netparams %s" %
10717 (idx, netparams.values()))
10718 nic.nicparams = dict(netparams)
10719 if nic.ip is not None:
10720 if nic.ip.lower() == constants.NIC_IP_POOL:
10722 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10723 except errors.ReservationError:
10724 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10725 " from the address pool" % idx,
10726 errors.ECODE_STATE)
10727 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10730 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10731 except errors.ReservationError:
10732 raise errors.OpPrereqError("IP address %s already in use"
10733 " or does not belong to network %s" %
10735 errors.ECODE_NOTUNIQUE)
10737 # net is None, ip None or given
10738 elif self.op.conflicts_check:
10739 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10741 # mirror node verification
10742 if self.op.disk_template in constants.DTS_INT_MIRROR:
10743 if self.op.snode == pnode.name:
10744 raise errors.OpPrereqError("The secondary node cannot be the"
10745 " primary node", errors.ECODE_INVAL)
10746 _CheckNodeOnline(self, self.op.snode)
10747 _CheckNodeNotDrained(self, self.op.snode)
10748 _CheckNodeVmCapable(self, self.op.snode)
10749 self.secondaries.append(self.op.snode)
10751 snode = self.cfg.GetNodeInfo(self.op.snode)
10752 if pnode.group != snode.group:
10753 self.LogWarning("The primary and secondary nodes are in two"
10754 " different node groups; the disk parameters"
10755 " from the first disk's node group will be"
10758 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10760 if self.op.disk_template in constants.DTS_INT_MIRROR:
10761 nodes.append(snode)
10762 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10763 if compat.any(map(has_es, nodes)):
10764 raise errors.OpPrereqError("Disk template %s not supported with"
10765 " exclusive storage" % self.op.disk_template,
10766 errors.ECODE_STATE)
10768 nodenames = [pnode.name] + self.secondaries
10770 # Verify instance specs
10771 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10773 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10774 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10775 constants.ISPEC_DISK_COUNT: len(self.disks),
10776 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10777 constants.ISPEC_NIC_COUNT: len(self.nics),
10778 constants.ISPEC_SPINDLE_USE: spindle_use,
10781 group_info = self.cfg.GetNodeGroup(pnode.group)
10782 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10783 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10784 if not self.op.ignore_ipolicy and res:
10785 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10786 (pnode.group, group_info.name, utils.CommaJoin(res)))
10787 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10789 if not self.adopt_disks:
10790 if self.op.disk_template == constants.DT_RBD:
10791 # _CheckRADOSFreeSpace() is just a placeholder.
10792 # Any function that checks prerequisites can be placed here.
10793 # Check if there is enough space on the RADOS cluster.
10794 _CheckRADOSFreeSpace()
10795 elif self.op.disk_template == constants.DT_EXT:
10796 # FIXME: Function that checks prereqs if needed
10799 # Check lv size requirements, if not adopting
10800 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10801 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10803 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10804 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10805 disk[constants.IDISK_ADOPT])
10806 for disk in self.disks])
10807 if len(all_lvs) != len(self.disks):
10808 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10809 errors.ECODE_INVAL)
10810 for lv_name in all_lvs:
10812 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10813 # to ReserveLV uses the same syntax
10814 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10815 except errors.ReservationError:
10816 raise errors.OpPrereqError("LV named %s used by another instance" %
10817 lv_name, errors.ECODE_NOTUNIQUE)
10819 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10820 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10822 node_lvs = self.rpc.call_lv_list([pnode.name],
10823 vg_names.payload.keys())[pnode.name]
10824 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10825 node_lvs = node_lvs.payload
10827 delta = all_lvs.difference(node_lvs.keys())
10829 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10830 utils.CommaJoin(delta),
10831 errors.ECODE_INVAL)
10832 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10834 raise errors.OpPrereqError("Online logical volumes found, cannot"
10835 " adopt: %s" % utils.CommaJoin(online_lvs),
10836 errors.ECODE_STATE)
10837 # update the size of disk based on what is found
10838 for dsk in self.disks:
10839 dsk[constants.IDISK_SIZE] = \
10840 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10841 dsk[constants.IDISK_ADOPT])][0]))
10843 elif self.op.disk_template == constants.DT_BLOCK:
10844 # Normalize and de-duplicate device paths
10845 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10846 for disk in self.disks])
10847 if len(all_disks) != len(self.disks):
10848 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10849 errors.ECODE_INVAL)
10850 baddisks = [d for d in all_disks
10851 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10853 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10854 " cannot be adopted" %
10855 (utils.CommaJoin(baddisks),
10856 constants.ADOPTABLE_BLOCKDEV_ROOT),
10857 errors.ECODE_INVAL)
10859 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10860 list(all_disks))[pnode.name]
10861 node_disks.Raise("Cannot get block device information from node %s" %
10863 node_disks = node_disks.payload
10864 delta = all_disks.difference(node_disks.keys())
10866 raise errors.OpPrereqError("Missing block device(s): %s" %
10867 utils.CommaJoin(delta),
10868 errors.ECODE_INVAL)
10869 for dsk in self.disks:
10870 dsk[constants.IDISK_SIZE] = \
10871 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10873 # Verify instance specs
10874 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10876 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10877 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10878 constants.ISPEC_DISK_COUNT: len(self.disks),
10879 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10880 for disk in self.disks],
10881 constants.ISPEC_NIC_COUNT: len(self.nics),
10882 constants.ISPEC_SPINDLE_USE: spindle_use,
10885 group_info = self.cfg.GetNodeGroup(pnode.group)
10886 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10887 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10888 if not self.op.ignore_ipolicy and res:
10889 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10890 " policy: %s") % (pnode.group,
10891 utils.CommaJoin(res)),
10892 errors.ECODE_INVAL)
10894 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10896 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10897 # check OS parameters (remotely)
10898 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10900 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10902 #TODO: _CheckExtParams (remotely)
10903 # Check parameters for extstorage
10905 # memory check on primary node
10906 #TODO(dynmem): use MINMEM for checking
10908 _CheckNodeFreeMemory(self, self.pnode.name,
10909 "creating instance %s" % self.op.instance_name,
10910 self.be_full[constants.BE_MAXMEM],
10911 self.op.hypervisor)
10913 self.dry_run_result = list(nodenames)
10915 def Exec(self, feedback_fn):
10916 """Create and add the instance to the cluster.
10919 instance = self.op.instance_name
10920 pnode_name = self.pnode.name
10922 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10923 self.owned_locks(locking.LEVEL_NODE)), \
10924 "Node locks differ from node resource locks"
10925 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10927 ht_kind = self.op.hypervisor
10928 if ht_kind in constants.HTS_REQ_PORT:
10929 network_port = self.cfg.AllocatePort()
10931 network_port = None
10933 # This is ugly but we got a chicken-egg problem here
10934 # We can only take the group disk parameters, as the instance
10935 # has no disks yet (we are generating them right here).
10936 node = self.cfg.GetNodeInfo(pnode_name)
10937 nodegroup = self.cfg.GetNodeGroup(node.group)
10938 disks = _GenerateDiskTemplate(self,
10939 self.op.disk_template,
10940 instance, pnode_name,
10943 self.instance_file_storage_dir,
10944 self.op.file_driver,
10947 self.cfg.GetGroupDiskParams(nodegroup))
10949 iobj = objects.Instance(name=instance, os=self.op.os_type,
10950 primary_node=pnode_name,
10951 nics=self.nics, disks=disks,
10952 disk_template=self.op.disk_template,
10953 admin_state=constants.ADMINST_DOWN,
10954 network_port=network_port,
10955 beparams=self.op.beparams,
10956 hvparams=self.op.hvparams,
10957 hypervisor=self.op.hypervisor,
10958 osparams=self.op.osparams,
10962 for tag in self.op.tags:
10965 if self.adopt_disks:
10966 if self.op.disk_template == constants.DT_PLAIN:
10967 # rename LVs to the newly-generated names; we need to construct
10968 # 'fake' LV disks with the old data, plus the new unique_id
10969 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10971 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10972 rename_to.append(t_dsk.logical_id)
10973 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10974 self.cfg.SetDiskID(t_dsk, pnode_name)
10975 result = self.rpc.call_blockdev_rename(pnode_name,
10976 zip(tmp_disks, rename_to))
10977 result.Raise("Failed to rename adoped LVs")
10979 feedback_fn("* creating instance disks...")
10981 _CreateDisks(self, iobj)
10982 except errors.OpExecError:
10983 self.LogWarning("Device creation failed, reverting...")
10985 _RemoveDisks(self, iobj)
10987 self.cfg.ReleaseDRBDMinors(instance)
10990 feedback_fn("adding instance %s to cluster config" % instance)
10992 self.cfg.AddInstance(iobj, self.proc.GetECId())
10994 # Declare that we don't want to remove the instance lock anymore, as we've
10995 # added the instance to the config
10996 del self.remove_locks[locking.LEVEL_INSTANCE]
10998 if self.op.mode == constants.INSTANCE_IMPORT:
10999 # Release unused nodes
11000 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11002 # Release all nodes
11003 _ReleaseLocks(self, locking.LEVEL_NODE)
11006 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11007 feedback_fn("* wiping instance disks...")
11009 _WipeDisks(self, iobj)
11010 except errors.OpExecError, err:
11011 logging.exception("Wiping disks failed")
11012 self.LogWarning("Wiping instance disks failed (%s)", err)
11016 # Something is already wrong with the disks, don't do anything else
11018 elif self.op.wait_for_sync:
11019 disk_abort = not _WaitForSync(self, iobj)
11020 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11021 # make sure the disks are not degraded (still sync-ing is ok)
11022 feedback_fn("* checking mirrors status")
11023 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11028 _RemoveDisks(self, iobj)
11029 self.cfg.RemoveInstance(iobj.name)
11030 # Make sure the instance lock gets removed
11031 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11032 raise errors.OpExecError("There are some degraded disks for"
11035 # Release all node resource locks
11036 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11038 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11039 # we need to set the disks ID to the primary node, since the
11040 # preceding code might or might have not done it, depending on
11041 # disk template and other options
11042 for disk in iobj.disks:
11043 self.cfg.SetDiskID(disk, pnode_name)
11044 if self.op.mode == constants.INSTANCE_CREATE:
11045 if not self.op.no_install:
11046 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11047 not self.op.wait_for_sync)
11049 feedback_fn("* pausing disk sync to install instance OS")
11050 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11053 for idx, success in enumerate(result.payload):
11055 logging.warn("pause-sync of instance %s for disk %d failed",
11058 feedback_fn("* running the instance OS create scripts...")
11059 # FIXME: pass debug option from opcode to backend
11061 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11062 self.op.debug_level)
11064 feedback_fn("* resuming disk sync")
11065 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11068 for idx, success in enumerate(result.payload):
11070 logging.warn("resume-sync of instance %s for disk %d failed",
11073 os_add_result.Raise("Could not add os for instance %s"
11074 " on node %s" % (instance, pnode_name))
11077 if self.op.mode == constants.INSTANCE_IMPORT:
11078 feedback_fn("* running the instance OS import scripts...")
11082 for idx, image in enumerate(self.src_images):
11086 # FIXME: pass debug option from opcode to backend
11087 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11088 constants.IEIO_FILE, (image, ),
11089 constants.IEIO_SCRIPT,
11090 (iobj.disks[idx], idx),
11092 transfers.append(dt)
11095 masterd.instance.TransferInstanceData(self, feedback_fn,
11096 self.op.src_node, pnode_name,
11097 self.pnode.secondary_ip,
11099 if not compat.all(import_result):
11100 self.LogWarning("Some disks for instance %s on node %s were not"
11101 " imported successfully" % (instance, pnode_name))
11103 rename_from = self._old_instance_name
11105 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11106 feedback_fn("* preparing remote import...")
11107 # The source cluster will stop the instance before attempting to make
11108 # a connection. In some cases stopping an instance can take a long
11109 # time, hence the shutdown timeout is added to the connection
11111 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11112 self.op.source_shutdown_timeout)
11113 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11115 assert iobj.primary_node == self.pnode.name
11117 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11118 self.source_x509_ca,
11119 self._cds, timeouts)
11120 if not compat.all(disk_results):
11121 # TODO: Should the instance still be started, even if some disks
11122 # failed to import (valid for local imports, too)?
11123 self.LogWarning("Some disks for instance %s on node %s were not"
11124 " imported successfully" % (instance, pnode_name))
11126 rename_from = self.source_instance_name
11129 # also checked in the prereq part
11130 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11133 # Run rename script on newly imported instance
11134 assert iobj.name == instance
11135 feedback_fn("Running rename script for %s" % instance)
11136 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11138 self.op.debug_level)
11139 if result.fail_msg:
11140 self.LogWarning("Failed to run rename script for %s on node"
11141 " %s: %s" % (instance, pnode_name, result.fail_msg))
11143 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11146 iobj.admin_state = constants.ADMINST_UP
11147 self.cfg.Update(iobj, feedback_fn)
11148 logging.info("Starting instance %s on node %s", instance, pnode_name)
11149 feedback_fn("* starting instance...")
11150 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11152 result.Raise("Could not start instance")
11154 return list(iobj.all_nodes)
11157 class LUInstanceMultiAlloc(NoHooksLU):
11158 """Allocates multiple instances at the same time.
11163 def CheckArguments(self):
11164 """Check arguments.
11168 for inst in self.op.instances:
11169 if inst.iallocator is not None:
11170 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11171 " instance objects", errors.ECODE_INVAL)
11172 nodes.append(bool(inst.pnode))
11173 if inst.disk_template in constants.DTS_INT_MIRROR:
11174 nodes.append(bool(inst.snode))
11176 has_nodes = compat.any(nodes)
11177 if compat.all(nodes) ^ has_nodes:
11178 raise errors.OpPrereqError("There are instance objects providing"
11179 " pnode/snode while others do not",
11180 errors.ECODE_INVAL)
11182 if self.op.iallocator is None:
11183 default_iallocator = self.cfg.GetDefaultIAllocator()
11184 if default_iallocator and has_nodes:
11185 self.op.iallocator = default_iallocator
11187 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11188 " given and no cluster-wide default"
11189 " iallocator found; please specify either"
11190 " an iallocator or nodes on the instances"
11191 " or set a cluster-wide default iallocator",
11192 errors.ECODE_INVAL)
11194 _CheckOpportunisticLocking(self.op)
11196 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11198 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11199 utils.CommaJoin(dups), errors.ECODE_INVAL)
11201 def ExpandNames(self):
11202 """Calculate the locks.
11205 self.share_locks = _ShareAll()
11206 self.needed_locks = {
11207 # iallocator will select nodes and even if no iallocator is used,
11208 # collisions with LUInstanceCreate should be avoided
11209 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11212 if self.op.iallocator:
11213 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11214 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11216 if self.op.opportunistic_locking:
11217 self.opportunistic_locks[locking.LEVEL_NODE] = True
11218 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11221 for inst in self.op.instances:
11222 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11223 nodeslist.append(inst.pnode)
11224 if inst.snode is not None:
11225 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11226 nodeslist.append(inst.snode)
11228 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11229 # Lock resources of instance's primary and secondary nodes (copy to
11230 # prevent accidential modification)
11231 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11233 def CheckPrereq(self):
11234 """Check prerequisite.
11237 cluster = self.cfg.GetClusterInfo()
11238 default_vg = self.cfg.GetVGName()
11239 ec_id = self.proc.GetECId()
11241 if self.op.opportunistic_locking:
11242 # Only consider nodes for which a lock is held
11243 node_whitelist = self.owned_locks(locking.LEVEL_NODE)
11245 node_whitelist = None
11247 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11248 _ComputeNics(op, cluster, None,
11250 _ComputeFullBeParams(op, cluster),
11252 for op in self.op.instances]
11254 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11255 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11257 ial.Run(self.op.iallocator)
11259 if not ial.success:
11260 raise errors.OpPrereqError("Can't compute nodes using"
11261 " iallocator '%s': %s" %
11262 (self.op.iallocator, ial.info),
11263 errors.ECODE_NORES)
11265 self.ia_result = ial.result
11267 if self.op.dry_run:
11268 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11269 constants.JOB_IDS_KEY: [],
11272 def _ConstructPartialResult(self):
11273 """Contructs the partial result.
11276 (allocatable, failed) = self.ia_result
11278 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11279 map(compat.fst, allocatable),
11280 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11283 def Exec(self, feedback_fn):
11284 """Executes the opcode.
11287 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11288 (allocatable, failed) = self.ia_result
11291 for (name, nodes) in allocatable:
11292 op = op2inst.pop(name)
11295 (op.pnode, op.snode) = nodes
11297 (op.pnode,) = nodes
11301 missing = set(op2inst.keys()) - set(failed)
11302 assert not missing, \
11303 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11305 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11308 def _CheckRADOSFreeSpace():
11309 """Compute disk size requirements inside the RADOS cluster.
11312 # For the RADOS cluster we assume there is always enough space.
11316 class LUInstanceConsole(NoHooksLU):
11317 """Connect to an instance's console.
11319 This is somewhat special in that it returns the command line that
11320 you need to run on the master node in order to connect to the
11326 def ExpandNames(self):
11327 self.share_locks = _ShareAll()
11328 self._ExpandAndLockInstance()
11330 def CheckPrereq(self):
11331 """Check prerequisites.
11333 This checks that the instance is in the cluster.
11336 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11337 assert self.instance is not None, \
11338 "Cannot retrieve locked instance %s" % self.op.instance_name
11339 _CheckNodeOnline(self, self.instance.primary_node)
11341 def Exec(self, feedback_fn):
11342 """Connect to the console of an instance
11345 instance = self.instance
11346 node = instance.primary_node
11348 node_insts = self.rpc.call_instance_list([node],
11349 [instance.hypervisor])[node]
11350 node_insts.Raise("Can't get node information from %s" % node)
11352 if instance.name not in node_insts.payload:
11353 if instance.admin_state == constants.ADMINST_UP:
11354 state = constants.INSTST_ERRORDOWN
11355 elif instance.admin_state == constants.ADMINST_DOWN:
11356 state = constants.INSTST_ADMINDOWN
11358 state = constants.INSTST_ADMINOFFLINE
11359 raise errors.OpExecError("Instance %s is not running (state %s)" %
11360 (instance.name, state))
11362 logging.debug("Connecting to console of %s on %s", instance.name, node)
11364 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11367 def _GetInstanceConsole(cluster, instance):
11368 """Returns console information for an instance.
11370 @type cluster: L{objects.Cluster}
11371 @type instance: L{objects.Instance}
11375 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11376 # beparams and hvparams are passed separately, to avoid editing the
11377 # instance and then saving the defaults in the instance itself.
11378 hvparams = cluster.FillHV(instance)
11379 beparams = cluster.FillBE(instance)
11380 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11382 assert console.instance == instance.name
11383 assert console.Validate()
11385 return console.ToDict()
11388 class LUInstanceReplaceDisks(LogicalUnit):
11389 """Replace the disks of an instance.
11392 HPATH = "mirrors-replace"
11393 HTYPE = constants.HTYPE_INSTANCE
11396 def CheckArguments(self):
11397 """Check arguments.
11400 remote_node = self.op.remote_node
11401 ialloc = self.op.iallocator
11402 if self.op.mode == constants.REPLACE_DISK_CHG:
11403 if remote_node is None and ialloc is None:
11404 raise errors.OpPrereqError("When changing the secondary either an"
11405 " iallocator script must be used or the"
11406 " new node given", errors.ECODE_INVAL)
11408 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11410 elif remote_node is not None or ialloc is not None:
11411 # Not replacing the secondary
11412 raise errors.OpPrereqError("The iallocator and new node options can"
11413 " only be used when changing the"
11414 " secondary node", errors.ECODE_INVAL)
11416 def ExpandNames(self):
11417 self._ExpandAndLockInstance()
11419 assert locking.LEVEL_NODE not in self.needed_locks
11420 assert locking.LEVEL_NODE_RES not in self.needed_locks
11421 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11423 assert self.op.iallocator is None or self.op.remote_node is None, \
11424 "Conflicting options"
11426 if self.op.remote_node is not None:
11427 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11429 # Warning: do not remove the locking of the new secondary here
11430 # unless DRBD8.AddChildren is changed to work in parallel;
11431 # currently it doesn't since parallel invocations of
11432 # FindUnusedMinor will conflict
11433 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11436 self.needed_locks[locking.LEVEL_NODE] = []
11437 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11439 if self.op.iallocator is not None:
11440 # iallocator will select a new node in the same group
11441 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11442 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11444 self.needed_locks[locking.LEVEL_NODE_RES] = []
11446 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11447 self.op.iallocator, self.op.remote_node,
11448 self.op.disks, self.op.early_release,
11449 self.op.ignore_ipolicy)
11451 self.tasklets = [self.replacer]
11453 def DeclareLocks(self, level):
11454 if level == locking.LEVEL_NODEGROUP:
11455 assert self.op.remote_node is None
11456 assert self.op.iallocator is not None
11457 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11459 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11460 # Lock all groups used by instance optimistically; this requires going
11461 # via the node before it's locked, requiring verification later on
11462 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11463 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11465 elif level == locking.LEVEL_NODE:
11466 if self.op.iallocator is not None:
11467 assert self.op.remote_node is None
11468 assert not self.needed_locks[locking.LEVEL_NODE]
11469 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11471 # Lock member nodes of all locked groups
11472 self.needed_locks[locking.LEVEL_NODE] = \
11474 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11475 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11477 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11479 self._LockInstancesNodes()
11481 elif level == locking.LEVEL_NODE_RES:
11483 self.needed_locks[locking.LEVEL_NODE_RES] = \
11484 self.needed_locks[locking.LEVEL_NODE]
11486 def BuildHooksEnv(self):
11487 """Build hooks env.
11489 This runs on the master, the primary and all the secondaries.
11492 instance = self.replacer.instance
11494 "MODE": self.op.mode,
11495 "NEW_SECONDARY": self.op.remote_node,
11496 "OLD_SECONDARY": instance.secondary_nodes[0],
11498 env.update(_BuildInstanceHookEnvByObject(self, instance))
11501 def BuildHooksNodes(self):
11502 """Build hooks nodes.
11505 instance = self.replacer.instance
11507 self.cfg.GetMasterNode(),
11508 instance.primary_node,
11510 if self.op.remote_node is not None:
11511 nl.append(self.op.remote_node)
11514 def CheckPrereq(self):
11515 """Check prerequisites.
11518 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11519 self.op.iallocator is None)
11521 # Verify if node group locks are still correct
11522 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11524 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11526 return LogicalUnit.CheckPrereq(self)
11529 class TLReplaceDisks(Tasklet):
11530 """Replaces disks for an instance.
11532 Note: Locking is not within the scope of this class.
11535 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11536 disks, early_release, ignore_ipolicy):
11537 """Initializes this class.
11540 Tasklet.__init__(self, lu)
11543 self.instance_name = instance_name
11545 self.iallocator_name = iallocator_name
11546 self.remote_node = remote_node
11548 self.early_release = early_release
11549 self.ignore_ipolicy = ignore_ipolicy
11552 self.instance = None
11553 self.new_node = None
11554 self.target_node = None
11555 self.other_node = None
11556 self.remote_node_info = None
11557 self.node_secondary_ip = None
11560 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11561 """Compute a new secondary node using an IAllocator.
11564 req = iallocator.IAReqRelocate(name=instance_name,
11565 relocate_from=list(relocate_from))
11566 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11568 ial.Run(iallocator_name)
11570 if not ial.success:
11571 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11572 " %s" % (iallocator_name, ial.info),
11573 errors.ECODE_NORES)
11575 remote_node_name = ial.result[0]
11577 lu.LogInfo("Selected new secondary for instance '%s': %s",
11578 instance_name, remote_node_name)
11580 return remote_node_name
11582 def _FindFaultyDisks(self, node_name):
11583 """Wrapper for L{_FindFaultyInstanceDisks}.
11586 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11589 def _CheckDisksActivated(self, instance):
11590 """Checks if the instance disks are activated.
11592 @param instance: The instance to check disks
11593 @return: True if they are activated, False otherwise
11596 nodes = instance.all_nodes
11598 for idx, dev in enumerate(instance.disks):
11600 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11601 self.cfg.SetDiskID(dev, node)
11603 result = _BlockdevFind(self, node, dev, instance)
11607 elif result.fail_msg or not result.payload:
11612 def CheckPrereq(self):
11613 """Check prerequisites.
11615 This checks that the instance is in the cluster.
11618 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11619 assert instance is not None, \
11620 "Cannot retrieve locked instance %s" % self.instance_name
11622 if instance.disk_template != constants.DT_DRBD8:
11623 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11624 " instances", errors.ECODE_INVAL)
11626 if len(instance.secondary_nodes) != 1:
11627 raise errors.OpPrereqError("The instance has a strange layout,"
11628 " expected one secondary but found %d" %
11629 len(instance.secondary_nodes),
11630 errors.ECODE_FAULT)
11632 instance = self.instance
11633 secondary_node = instance.secondary_nodes[0]
11635 if self.iallocator_name is None:
11636 remote_node = self.remote_node
11638 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11639 instance.name, instance.secondary_nodes)
11641 if remote_node is None:
11642 self.remote_node_info = None
11644 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11645 "Remote node '%s' is not locked" % remote_node
11647 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11648 assert self.remote_node_info is not None, \
11649 "Cannot retrieve locked node %s" % remote_node
11651 if remote_node == self.instance.primary_node:
11652 raise errors.OpPrereqError("The specified node is the primary node of"
11653 " the instance", errors.ECODE_INVAL)
11655 if remote_node == secondary_node:
11656 raise errors.OpPrereqError("The specified node is already the"
11657 " secondary node of the instance",
11658 errors.ECODE_INVAL)
11660 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11661 constants.REPLACE_DISK_CHG):
11662 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11663 errors.ECODE_INVAL)
11665 if self.mode == constants.REPLACE_DISK_AUTO:
11666 if not self._CheckDisksActivated(instance):
11667 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11668 " first" % self.instance_name,
11669 errors.ECODE_STATE)
11670 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11671 faulty_secondary = self._FindFaultyDisks(secondary_node)
11673 if faulty_primary and faulty_secondary:
11674 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11675 " one node and can not be repaired"
11676 " automatically" % self.instance_name,
11677 errors.ECODE_STATE)
11680 self.disks = faulty_primary
11681 self.target_node = instance.primary_node
11682 self.other_node = secondary_node
11683 check_nodes = [self.target_node, self.other_node]
11684 elif faulty_secondary:
11685 self.disks = faulty_secondary
11686 self.target_node = secondary_node
11687 self.other_node = instance.primary_node
11688 check_nodes = [self.target_node, self.other_node]
11694 # Non-automatic modes
11695 if self.mode == constants.REPLACE_DISK_PRI:
11696 self.target_node = instance.primary_node
11697 self.other_node = secondary_node
11698 check_nodes = [self.target_node, self.other_node]
11700 elif self.mode == constants.REPLACE_DISK_SEC:
11701 self.target_node = secondary_node
11702 self.other_node = instance.primary_node
11703 check_nodes = [self.target_node, self.other_node]
11705 elif self.mode == constants.REPLACE_DISK_CHG:
11706 self.new_node = remote_node
11707 self.other_node = instance.primary_node
11708 self.target_node = secondary_node
11709 check_nodes = [self.new_node, self.other_node]
11711 _CheckNodeNotDrained(self.lu, remote_node)
11712 _CheckNodeVmCapable(self.lu, remote_node)
11714 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11715 assert old_node_info is not None
11716 if old_node_info.offline and not self.early_release:
11717 # doesn't make sense to delay the release
11718 self.early_release = True
11719 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11720 " early-release mode", secondary_node)
11723 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11726 # If not specified all disks should be replaced
11728 self.disks = range(len(self.instance.disks))
11730 # TODO: This is ugly, but right now we can't distinguish between internal
11731 # submitted opcode and external one. We should fix that.
11732 if self.remote_node_info:
11733 # We change the node, lets verify it still meets instance policy
11734 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11735 cluster = self.cfg.GetClusterInfo()
11736 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11738 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11739 ignore=self.ignore_ipolicy)
11741 for node in check_nodes:
11742 _CheckNodeOnline(self.lu, node)
11744 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11747 if node_name is not None)
11749 # Release unneeded node and node resource locks
11750 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11751 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11752 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11754 # Release any owned node group
11755 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11757 # Check whether disks are valid
11758 for disk_idx in self.disks:
11759 instance.FindDisk(disk_idx)
11761 # Get secondary node IP addresses
11762 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11763 in self.cfg.GetMultiNodeInfo(touched_nodes))
11765 def Exec(self, feedback_fn):
11766 """Execute disk replacement.
11768 This dispatches the disk replacement to the appropriate handler.
11772 # Verify owned locks before starting operation
11773 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11774 assert set(owned_nodes) == set(self.node_secondary_ip), \
11775 ("Incorrect node locks, owning %s, expected %s" %
11776 (owned_nodes, self.node_secondary_ip.keys()))
11777 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11778 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11779 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11781 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11782 assert list(owned_instances) == [self.instance_name], \
11783 "Instance '%s' not locked" % self.instance_name
11785 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11786 "Should not own any node group lock at this point"
11789 feedback_fn("No disks need replacement for instance '%s'" %
11790 self.instance.name)
11793 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11794 (utils.CommaJoin(self.disks), self.instance.name))
11795 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11796 feedback_fn("Current seconary node: %s" %
11797 utils.CommaJoin(self.instance.secondary_nodes))
11799 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11801 # Activate the instance disks if we're replacing them on a down instance
11803 _StartInstanceDisks(self.lu, self.instance, True)
11806 # Should we replace the secondary node?
11807 if self.new_node is not None:
11808 fn = self._ExecDrbd8Secondary
11810 fn = self._ExecDrbd8DiskOnly
11812 result = fn(feedback_fn)
11814 # Deactivate the instance disks if we're replacing them on a
11817 _SafeShutdownInstanceDisks(self.lu, self.instance)
11819 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11822 # Verify owned locks
11823 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11824 nodes = frozenset(self.node_secondary_ip)
11825 assert ((self.early_release and not owned_nodes) or
11826 (not self.early_release and not (set(owned_nodes) - nodes))), \
11827 ("Not owning the correct locks, early_release=%s, owned=%r,"
11828 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11832 def _CheckVolumeGroup(self, nodes):
11833 self.lu.LogInfo("Checking volume groups")
11835 vgname = self.cfg.GetVGName()
11837 # Make sure volume group exists on all involved nodes
11838 results = self.rpc.call_vg_list(nodes)
11840 raise errors.OpExecError("Can't list volume groups on the nodes")
11843 res = results[node]
11844 res.Raise("Error checking node %s" % node)
11845 if vgname not in res.payload:
11846 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11849 def _CheckDisksExistence(self, nodes):
11850 # Check disk existence
11851 for idx, dev in enumerate(self.instance.disks):
11852 if idx not in self.disks:
11856 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11857 self.cfg.SetDiskID(dev, node)
11859 result = _BlockdevFind(self, node, dev, self.instance)
11861 msg = result.fail_msg
11862 if msg or not result.payload:
11864 msg = "disk not found"
11865 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11868 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11869 for idx, dev in enumerate(self.instance.disks):
11870 if idx not in self.disks:
11873 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11876 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11877 on_primary, ldisk=ldisk):
11878 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11879 " replace disks for instance %s" %
11880 (node_name, self.instance.name))
11882 def _CreateNewStorage(self, node_name):
11883 """Create new storage on the primary or secondary node.
11885 This is only used for same-node replaces, not for changing the
11886 secondary node, hence we don't want to modify the existing disk.
11891 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11892 for idx, dev in enumerate(disks):
11893 if idx not in self.disks:
11896 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11898 self.cfg.SetDiskID(dev, node_name)
11900 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11901 names = _GenerateUniqueNames(self.lu, lv_names)
11903 (data_disk, meta_disk) = dev.children
11904 vg_data = data_disk.logical_id[0]
11905 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11906 logical_id=(vg_data, names[0]),
11907 params=data_disk.params)
11908 vg_meta = meta_disk.logical_id[0]
11909 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11910 size=constants.DRBD_META_SIZE,
11911 logical_id=(vg_meta, names[1]),
11912 params=meta_disk.params)
11914 new_lvs = [lv_data, lv_meta]
11915 old_lvs = [child.Copy() for child in dev.children]
11916 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11917 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11919 # we pass force_create=True to force the LVM creation
11920 for new_lv in new_lvs:
11921 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11922 _GetInstanceInfoText(self.instance), False,
11927 def _CheckDevices(self, node_name, iv_names):
11928 for name, (dev, _, _) in iv_names.iteritems():
11929 self.cfg.SetDiskID(dev, node_name)
11931 result = _BlockdevFind(self, node_name, dev, self.instance)
11933 msg = result.fail_msg
11934 if msg or not result.payload:
11936 msg = "disk not found"
11937 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11940 if result.payload.is_degraded:
11941 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11943 def _RemoveOldStorage(self, node_name, iv_names):
11944 for name, (_, old_lvs, _) in iv_names.iteritems():
11945 self.lu.LogInfo("Remove logical volumes for %s", name)
11948 self.cfg.SetDiskID(lv, node_name)
11950 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11952 self.lu.LogWarning("Can't remove old LV: %s", msg,
11953 hint="remove unused LVs manually")
11955 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11956 """Replace a disk on the primary or secondary for DRBD 8.
11958 The algorithm for replace is quite complicated:
11960 1. for each disk to be replaced:
11962 1. create new LVs on the target node with unique names
11963 1. detach old LVs from the drbd device
11964 1. rename old LVs to name_replaced.<time_t>
11965 1. rename new LVs to old LVs
11966 1. attach the new LVs (with the old names now) to the drbd device
11968 1. wait for sync across all devices
11970 1. for each modified disk:
11972 1. remove old LVs (which have the name name_replaces.<time_t>)
11974 Failures are not very well handled.
11979 # Step: check device activation
11980 self.lu.LogStep(1, steps_total, "Check device existence")
11981 self._CheckDisksExistence([self.other_node, self.target_node])
11982 self._CheckVolumeGroup([self.target_node, self.other_node])
11984 # Step: check other node consistency
11985 self.lu.LogStep(2, steps_total, "Check peer consistency")
11986 self._CheckDisksConsistency(self.other_node,
11987 self.other_node == self.instance.primary_node,
11990 # Step: create new storage
11991 self.lu.LogStep(3, steps_total, "Allocate new storage")
11992 iv_names = self._CreateNewStorage(self.target_node)
11994 # Step: for each lv, detach+rename*2+attach
11995 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11996 for dev, old_lvs, new_lvs in iv_names.itervalues():
11997 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11999 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12001 result.Raise("Can't detach drbd from local storage on node"
12002 " %s for device %s" % (self.target_node, dev.iv_name))
12004 #cfg.Update(instance)
12006 # ok, we created the new LVs, so now we know we have the needed
12007 # storage; as such, we proceed on the target node to rename
12008 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12009 # using the assumption that logical_id == physical_id (which in
12010 # turn is the unique_id on that node)
12012 # FIXME(iustin): use a better name for the replaced LVs
12013 temp_suffix = int(time.time())
12014 ren_fn = lambda d, suff: (d.physical_id[0],
12015 d.physical_id[1] + "_replaced-%s" % suff)
12017 # Build the rename list based on what LVs exist on the node
12018 rename_old_to_new = []
12019 for to_ren in old_lvs:
12020 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12021 if not result.fail_msg and result.payload:
12023 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12025 self.lu.LogInfo("Renaming the old LVs on the target node")
12026 result = self.rpc.call_blockdev_rename(self.target_node,
12028 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12030 # Now we rename the new LVs to the old LVs
12031 self.lu.LogInfo("Renaming the new LVs on the target node")
12032 rename_new_to_old = [(new, old.physical_id)
12033 for old, new in zip(old_lvs, new_lvs)]
12034 result = self.rpc.call_blockdev_rename(self.target_node,
12036 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12038 # Intermediate steps of in memory modifications
12039 for old, new in zip(old_lvs, new_lvs):
12040 new.logical_id = old.logical_id
12041 self.cfg.SetDiskID(new, self.target_node)
12043 # We need to modify old_lvs so that removal later removes the
12044 # right LVs, not the newly added ones; note that old_lvs is a
12046 for disk in old_lvs:
12047 disk.logical_id = ren_fn(disk, temp_suffix)
12048 self.cfg.SetDiskID(disk, self.target_node)
12050 # Now that the new lvs have the old name, we can add them to the device
12051 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12052 result = self.rpc.call_blockdev_addchildren(self.target_node,
12053 (dev, self.instance), new_lvs)
12054 msg = result.fail_msg
12056 for new_lv in new_lvs:
12057 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12060 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12061 hint=("cleanup manually the unused logical"
12063 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12065 cstep = itertools.count(5)
12067 if self.early_release:
12068 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12069 self._RemoveOldStorage(self.target_node, iv_names)
12070 # TODO: Check if releasing locks early still makes sense
12071 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12073 # Release all resource locks except those used by the instance
12074 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12075 keep=self.node_secondary_ip.keys())
12077 # Release all node locks while waiting for sync
12078 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12080 # TODO: Can the instance lock be downgraded here? Take the optional disk
12081 # shutdown in the caller into consideration.
12084 # This can fail as the old devices are degraded and _WaitForSync
12085 # does a combined result over all disks, so we don't check its return value
12086 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12087 _WaitForSync(self.lu, self.instance)
12089 # Check all devices manually
12090 self._CheckDevices(self.instance.primary_node, iv_names)
12092 # Step: remove old storage
12093 if not self.early_release:
12094 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12095 self._RemoveOldStorage(self.target_node, iv_names)
12097 def _ExecDrbd8Secondary(self, feedback_fn):
12098 """Replace the secondary node for DRBD 8.
12100 The algorithm for replace is quite complicated:
12101 - for all disks of the instance:
12102 - create new LVs on the new node with same names
12103 - shutdown the drbd device on the old secondary
12104 - disconnect the drbd network on the primary
12105 - create the drbd device on the new secondary
12106 - network attach the drbd on the primary, using an artifice:
12107 the drbd code for Attach() will connect to the network if it
12108 finds a device which is connected to the good local disks but
12109 not network enabled
12110 - wait for sync across all devices
12111 - remove all disks from the old secondary
12113 Failures are not very well handled.
12118 pnode = self.instance.primary_node
12120 # Step: check device activation
12121 self.lu.LogStep(1, steps_total, "Check device existence")
12122 self._CheckDisksExistence([self.instance.primary_node])
12123 self._CheckVolumeGroup([self.instance.primary_node])
12125 # Step: check other node consistency
12126 self.lu.LogStep(2, steps_total, "Check peer consistency")
12127 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12129 # Step: create new storage
12130 self.lu.LogStep(3, steps_total, "Allocate new storage")
12131 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12132 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12133 for idx, dev in enumerate(disks):
12134 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12135 (self.new_node, idx))
12136 # we pass force_create=True to force LVM creation
12137 for new_lv in dev.children:
12138 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12139 True, _GetInstanceInfoText(self.instance), False,
12142 # Step 4: dbrd minors and drbd setups changes
12143 # after this, we must manually remove the drbd minors on both the
12144 # error and the success paths
12145 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12146 minors = self.cfg.AllocateDRBDMinor([self.new_node
12147 for dev in self.instance.disks],
12148 self.instance.name)
12149 logging.debug("Allocated minors %r", minors)
12152 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12153 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12154 (self.new_node, idx))
12155 # create new devices on new_node; note that we create two IDs:
12156 # one without port, so the drbd will be activated without
12157 # networking information on the new node at this stage, and one
12158 # with network, for the latter activation in step 4
12159 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12160 if self.instance.primary_node == o_node1:
12163 assert self.instance.primary_node == o_node2, "Three-node instance?"
12166 new_alone_id = (self.instance.primary_node, self.new_node, None,
12167 p_minor, new_minor, o_secret)
12168 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12169 p_minor, new_minor, o_secret)
12171 iv_names[idx] = (dev, dev.children, new_net_id)
12172 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12174 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12175 logical_id=new_alone_id,
12176 children=dev.children,
12179 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12182 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12184 _GetInstanceInfoText(self.instance), False,
12186 except errors.GenericError:
12187 self.cfg.ReleaseDRBDMinors(self.instance.name)
12190 # We have new devices, shutdown the drbd on the old secondary
12191 for idx, dev in enumerate(self.instance.disks):
12192 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12193 self.cfg.SetDiskID(dev, self.target_node)
12194 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12195 (dev, self.instance)).fail_msg
12197 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12198 "node: %s" % (idx, msg),
12199 hint=("Please cleanup this device manually as"
12200 " soon as possible"))
12202 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12203 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12204 self.instance.disks)[pnode]
12206 msg = result.fail_msg
12208 # detaches didn't succeed (unlikely)
12209 self.cfg.ReleaseDRBDMinors(self.instance.name)
12210 raise errors.OpExecError("Can't detach the disks from the network on"
12211 " old node: %s" % (msg,))
12213 # if we managed to detach at least one, we update all the disks of
12214 # the instance to point to the new secondary
12215 self.lu.LogInfo("Updating instance configuration")
12216 for dev, _, new_logical_id in iv_names.itervalues():
12217 dev.logical_id = new_logical_id
12218 self.cfg.SetDiskID(dev, self.instance.primary_node)
12220 self.cfg.Update(self.instance, feedback_fn)
12222 # Release all node locks (the configuration has been updated)
12223 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12225 # and now perform the drbd attach
12226 self.lu.LogInfo("Attaching primary drbds to new secondary"
12227 " (standalone => connected)")
12228 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12230 self.node_secondary_ip,
12231 (self.instance.disks, self.instance),
12232 self.instance.name,
12234 for to_node, to_result in result.items():
12235 msg = to_result.fail_msg
12237 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12239 hint=("please do a gnt-instance info to see the"
12240 " status of disks"))
12242 cstep = itertools.count(5)
12244 if self.early_release:
12245 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12246 self._RemoveOldStorage(self.target_node, iv_names)
12247 # TODO: Check if releasing locks early still makes sense
12248 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12250 # Release all resource locks except those used by the instance
12251 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12252 keep=self.node_secondary_ip.keys())
12254 # TODO: Can the instance lock be downgraded here? Take the optional disk
12255 # shutdown in the caller into consideration.
12258 # This can fail as the old devices are degraded and _WaitForSync
12259 # does a combined result over all disks, so we don't check its return value
12260 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12261 _WaitForSync(self.lu, self.instance)
12263 # Check all devices manually
12264 self._CheckDevices(self.instance.primary_node, iv_names)
12266 # Step: remove old storage
12267 if not self.early_release:
12268 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12269 self._RemoveOldStorage(self.target_node, iv_names)
12272 class LURepairNodeStorage(NoHooksLU):
12273 """Repairs the volume group on a node.
12278 def CheckArguments(self):
12279 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12281 storage_type = self.op.storage_type
12283 if (constants.SO_FIX_CONSISTENCY not in
12284 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12285 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12286 " repaired" % storage_type,
12287 errors.ECODE_INVAL)
12289 def ExpandNames(self):
12290 self.needed_locks = {
12291 locking.LEVEL_NODE: [self.op.node_name],
12294 def _CheckFaultyDisks(self, instance, node_name):
12295 """Ensure faulty disks abort the opcode or at least warn."""
12297 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12299 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12300 " node '%s'" % (instance.name, node_name),
12301 errors.ECODE_STATE)
12302 except errors.OpPrereqError, err:
12303 if self.op.ignore_consistency:
12304 self.LogWarning(str(err.args[0]))
12308 def CheckPrereq(self):
12309 """Check prerequisites.
12312 # Check whether any instance on this node has faulty disks
12313 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12314 if inst.admin_state != constants.ADMINST_UP:
12316 check_nodes = set(inst.all_nodes)
12317 check_nodes.discard(self.op.node_name)
12318 for inst_node_name in check_nodes:
12319 self._CheckFaultyDisks(inst, inst_node_name)
12321 def Exec(self, feedback_fn):
12322 feedback_fn("Repairing storage unit '%s' on %s ..." %
12323 (self.op.name, self.op.node_name))
12325 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12326 result = self.rpc.call_storage_execute(self.op.node_name,
12327 self.op.storage_type, st_args,
12329 constants.SO_FIX_CONSISTENCY)
12330 result.Raise("Failed to repair storage unit '%s' on %s" %
12331 (self.op.name, self.op.node_name))
12334 class LUNodeEvacuate(NoHooksLU):
12335 """Evacuates instances off a list of nodes.
12340 _MODE2IALLOCATOR = {
12341 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12342 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12343 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12345 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12346 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12347 constants.IALLOCATOR_NEVAC_MODES)
12349 def CheckArguments(self):
12350 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12352 def ExpandNames(self):
12353 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12355 if self.op.remote_node is not None:
12356 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12357 assert self.op.remote_node
12359 if self.op.remote_node == self.op.node_name:
12360 raise errors.OpPrereqError("Can not use evacuated node as a new"
12361 " secondary node", errors.ECODE_INVAL)
12363 if self.op.mode != constants.NODE_EVAC_SEC:
12364 raise errors.OpPrereqError("Without the use of an iallocator only"
12365 " secondary instances can be evacuated",
12366 errors.ECODE_INVAL)
12369 self.share_locks = _ShareAll()
12370 self.needed_locks = {
12371 locking.LEVEL_INSTANCE: [],
12372 locking.LEVEL_NODEGROUP: [],
12373 locking.LEVEL_NODE: [],
12376 # Determine nodes (via group) optimistically, needs verification once locks
12377 # have been acquired
12378 self.lock_nodes = self._DetermineNodes()
12380 def _DetermineNodes(self):
12381 """Gets the list of nodes to operate on.
12384 if self.op.remote_node is None:
12385 # Iallocator will choose any node(s) in the same group
12386 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12388 group_nodes = frozenset([self.op.remote_node])
12390 # Determine nodes to be locked
12391 return set([self.op.node_name]) | group_nodes
12393 def _DetermineInstances(self):
12394 """Builds list of instances to operate on.
12397 assert self.op.mode in constants.NODE_EVAC_MODES
12399 if self.op.mode == constants.NODE_EVAC_PRI:
12400 # Primary instances only
12401 inst_fn = _GetNodePrimaryInstances
12402 assert self.op.remote_node is None, \
12403 "Evacuating primary instances requires iallocator"
12404 elif self.op.mode == constants.NODE_EVAC_SEC:
12405 # Secondary instances only
12406 inst_fn = _GetNodeSecondaryInstances
12409 assert self.op.mode == constants.NODE_EVAC_ALL
12410 inst_fn = _GetNodeInstances
12411 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12413 raise errors.OpPrereqError("Due to an issue with the iallocator"
12414 " interface it is not possible to evacuate"
12415 " all instances at once; specify explicitly"
12416 " whether to evacuate primary or secondary"
12418 errors.ECODE_INVAL)
12420 return inst_fn(self.cfg, self.op.node_name)
12422 def DeclareLocks(self, level):
12423 if level == locking.LEVEL_INSTANCE:
12424 # Lock instances optimistically, needs verification once node and group
12425 # locks have been acquired
12426 self.needed_locks[locking.LEVEL_INSTANCE] = \
12427 set(i.name for i in self._DetermineInstances())
12429 elif level == locking.LEVEL_NODEGROUP:
12430 # Lock node groups for all potential target nodes optimistically, needs
12431 # verification once nodes have been acquired
12432 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12433 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12435 elif level == locking.LEVEL_NODE:
12436 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12438 def CheckPrereq(self):
12440 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12441 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12442 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12444 need_nodes = self._DetermineNodes()
12446 if not owned_nodes.issuperset(need_nodes):
12447 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12448 " locks were acquired, current nodes are"
12449 " are '%s', used to be '%s'; retry the"
12451 (self.op.node_name,
12452 utils.CommaJoin(need_nodes),
12453 utils.CommaJoin(owned_nodes)),
12454 errors.ECODE_STATE)
12456 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12457 if owned_groups != wanted_groups:
12458 raise errors.OpExecError("Node groups changed since locks were acquired,"
12459 " current groups are '%s', used to be '%s';"
12460 " retry the operation" %
12461 (utils.CommaJoin(wanted_groups),
12462 utils.CommaJoin(owned_groups)))
12464 # Determine affected instances
12465 self.instances = self._DetermineInstances()
12466 self.instance_names = [i.name for i in self.instances]
12468 if set(self.instance_names) != owned_instances:
12469 raise errors.OpExecError("Instances on node '%s' changed since locks"
12470 " were acquired, current instances are '%s',"
12471 " used to be '%s'; retry the operation" %
12472 (self.op.node_name,
12473 utils.CommaJoin(self.instance_names),
12474 utils.CommaJoin(owned_instances)))
12476 if self.instance_names:
12477 self.LogInfo("Evacuating instances from node '%s': %s",
12479 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12481 self.LogInfo("No instances to evacuate from node '%s'",
12484 if self.op.remote_node is not None:
12485 for i in self.instances:
12486 if i.primary_node == self.op.remote_node:
12487 raise errors.OpPrereqError("Node %s is the primary node of"
12488 " instance %s, cannot use it as"
12490 (self.op.remote_node, i.name),
12491 errors.ECODE_INVAL)
12493 def Exec(self, feedback_fn):
12494 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12496 if not self.instance_names:
12497 # No instances to evacuate
12500 elif self.op.iallocator is not None:
12501 # TODO: Implement relocation to other group
12502 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12503 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12504 instances=list(self.instance_names))
12505 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12507 ial.Run(self.op.iallocator)
12509 if not ial.success:
12510 raise errors.OpPrereqError("Can't compute node evacuation using"
12511 " iallocator '%s': %s" %
12512 (self.op.iallocator, ial.info),
12513 errors.ECODE_NORES)
12515 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12517 elif self.op.remote_node is not None:
12518 assert self.op.mode == constants.NODE_EVAC_SEC
12520 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12521 remote_node=self.op.remote_node,
12523 mode=constants.REPLACE_DISK_CHG,
12524 early_release=self.op.early_release)]
12525 for instance_name in self.instance_names]
12528 raise errors.ProgrammerError("No iallocator or remote node")
12530 return ResultWithJobs(jobs)
12533 def _SetOpEarlyRelease(early_release, op):
12534 """Sets C{early_release} flag on opcodes if available.
12538 op.early_release = early_release
12539 except AttributeError:
12540 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12545 def _NodeEvacDest(use_nodes, group, nodes):
12546 """Returns group or nodes depending on caller's choice.
12550 return utils.CommaJoin(nodes)
12555 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12556 """Unpacks the result of change-group and node-evacuate iallocator requests.
12558 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12559 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12561 @type lu: L{LogicalUnit}
12562 @param lu: Logical unit instance
12563 @type alloc_result: tuple/list
12564 @param alloc_result: Result from iallocator
12565 @type early_release: bool
12566 @param early_release: Whether to release locks early if possible
12567 @type use_nodes: bool
12568 @param use_nodes: Whether to display node names instead of groups
12571 (moved, failed, jobs) = alloc_result
12574 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12575 for (name, reason) in failed)
12576 lu.LogWarning("Unable to evacuate instances %s", failreason)
12577 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12580 lu.LogInfo("Instances to be moved: %s",
12581 utils.CommaJoin("%s (to %s)" %
12582 (name, _NodeEvacDest(use_nodes, group, nodes))
12583 for (name, group, nodes) in moved))
12585 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12586 map(opcodes.OpCode.LoadOpCode, ops))
12590 def _DiskSizeInBytesToMebibytes(lu, size):
12591 """Converts a disk size in bytes to mebibytes.
12593 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12596 (mib, remainder) = divmod(size, 1024 * 1024)
12599 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12600 " to not overwrite existing data (%s bytes will not be"
12601 " wiped)", (1024 * 1024) - remainder)
12607 class LUInstanceGrowDisk(LogicalUnit):
12608 """Grow a disk of an instance.
12611 HPATH = "disk-grow"
12612 HTYPE = constants.HTYPE_INSTANCE
12615 def ExpandNames(self):
12616 self._ExpandAndLockInstance()
12617 self.needed_locks[locking.LEVEL_NODE] = []
12618 self.needed_locks[locking.LEVEL_NODE_RES] = []
12619 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12620 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12622 def DeclareLocks(self, level):
12623 if level == locking.LEVEL_NODE:
12624 self._LockInstancesNodes()
12625 elif level == locking.LEVEL_NODE_RES:
12627 self.needed_locks[locking.LEVEL_NODE_RES] = \
12628 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12630 def BuildHooksEnv(self):
12631 """Build hooks env.
12633 This runs on the master, the primary and all the secondaries.
12637 "DISK": self.op.disk,
12638 "AMOUNT": self.op.amount,
12639 "ABSOLUTE": self.op.absolute,
12641 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12644 def BuildHooksNodes(self):
12645 """Build hooks nodes.
12648 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12651 def CheckPrereq(self):
12652 """Check prerequisites.
12654 This checks that the instance is in the cluster.
12657 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12658 assert instance is not None, \
12659 "Cannot retrieve locked instance %s" % self.op.instance_name
12660 nodenames = list(instance.all_nodes)
12661 for node in nodenames:
12662 _CheckNodeOnline(self, node)
12664 self.instance = instance
12666 if instance.disk_template not in constants.DTS_GROWABLE:
12667 raise errors.OpPrereqError("Instance's disk layout does not support"
12668 " growing", errors.ECODE_INVAL)
12670 self.disk = instance.FindDisk(self.op.disk)
12672 if self.op.absolute:
12673 self.target = self.op.amount
12674 self.delta = self.target - self.disk.size
12676 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12677 "current disk size (%s)" %
12678 (utils.FormatUnit(self.target, "h"),
12679 utils.FormatUnit(self.disk.size, "h")),
12680 errors.ECODE_STATE)
12682 self.delta = self.op.amount
12683 self.target = self.disk.size + self.delta
12685 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12686 utils.FormatUnit(self.delta, "h"),
12687 errors.ECODE_INVAL)
12689 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12691 def _CheckDiskSpace(self, nodenames, req_vgspace):
12692 template = self.instance.disk_template
12693 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12694 # TODO: check the free disk space for file, when that feature will be
12696 nodes = map(self.cfg.GetNodeInfo, nodenames)
12697 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12700 # With exclusive storage we need to something smarter than just looking
12701 # at free space; for now, let's simply abort the operation.
12702 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12703 " is enabled", errors.ECODE_STATE)
12704 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12706 def Exec(self, feedback_fn):
12707 """Execute disk grow.
12710 instance = self.instance
12713 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12714 assert (self.owned_locks(locking.LEVEL_NODE) ==
12715 self.owned_locks(locking.LEVEL_NODE_RES))
12717 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12719 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12721 raise errors.OpExecError("Cannot activate block device to grow")
12723 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12724 (self.op.disk, instance.name,
12725 utils.FormatUnit(self.delta, "h"),
12726 utils.FormatUnit(self.target, "h")))
12728 # First run all grow ops in dry-run mode
12729 for node in instance.all_nodes:
12730 self.cfg.SetDiskID(disk, node)
12731 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12733 result.Raise("Dry-run grow request failed to node %s" % node)
12736 # Get disk size from primary node for wiping
12737 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12738 result.Raise("Failed to retrieve disk size from node '%s'" %
12739 instance.primary_node)
12741 (disk_size_in_bytes, ) = result.payload
12743 if disk_size_in_bytes is None:
12744 raise errors.OpExecError("Failed to retrieve disk size from primary"
12745 " node '%s'" % instance.primary_node)
12747 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12749 assert old_disk_size >= disk.size, \
12750 ("Retrieved disk size too small (got %s, should be at least %s)" %
12751 (old_disk_size, disk.size))
12753 old_disk_size = None
12755 # We know that (as far as we can test) operations across different
12756 # nodes will succeed, time to run it for real on the backing storage
12757 for node in instance.all_nodes:
12758 self.cfg.SetDiskID(disk, node)
12759 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12761 result.Raise("Grow request failed to node %s" % node)
12763 # And now execute it for logical storage, on the primary node
12764 node = instance.primary_node
12765 self.cfg.SetDiskID(disk, node)
12766 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12768 result.Raise("Grow request failed to node %s" % node)
12770 disk.RecordGrow(self.delta)
12771 self.cfg.Update(instance, feedback_fn)
12773 # Changes have been recorded, release node lock
12774 _ReleaseLocks(self, locking.LEVEL_NODE)
12776 # Downgrade lock while waiting for sync
12777 self.glm.downgrade(locking.LEVEL_INSTANCE)
12779 assert wipe_disks ^ (old_disk_size is None)
12782 assert instance.disks[self.op.disk] == disk
12784 # Wipe newly added disk space
12785 _WipeDisks(self, instance,
12786 disks=[(self.op.disk, disk, old_disk_size)])
12788 if self.op.wait_for_sync:
12789 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12791 self.LogWarning("Disk syncing has not returned a good status; check"
12793 if instance.admin_state != constants.ADMINST_UP:
12794 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12795 elif instance.admin_state != constants.ADMINST_UP:
12796 self.LogWarning("Not shutting down the disk even if the instance is"
12797 " not supposed to be running because no wait for"
12798 " sync mode was requested")
12800 assert self.owned_locks(locking.LEVEL_NODE_RES)
12801 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12804 class LUInstanceQueryData(NoHooksLU):
12805 """Query runtime instance data.
12810 def ExpandNames(self):
12811 self.needed_locks = {}
12813 # Use locking if requested or when non-static information is wanted
12814 if not (self.op.static or self.op.use_locking):
12815 self.LogWarning("Non-static data requested, locks need to be acquired")
12816 self.op.use_locking = True
12818 if self.op.instances or not self.op.use_locking:
12819 # Expand instance names right here
12820 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12822 # Will use acquired locks
12823 self.wanted_names = None
12825 if self.op.use_locking:
12826 self.share_locks = _ShareAll()
12828 if self.wanted_names is None:
12829 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12831 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12833 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12834 self.needed_locks[locking.LEVEL_NODE] = []
12835 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12837 def DeclareLocks(self, level):
12838 if self.op.use_locking:
12839 if level == locking.LEVEL_NODEGROUP:
12840 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12842 # Lock all groups used by instances optimistically; this requires going
12843 # via the node before it's locked, requiring verification later on
12844 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12845 frozenset(group_uuid
12846 for instance_name in owned_instances
12848 self.cfg.GetInstanceNodeGroups(instance_name))
12850 elif level == locking.LEVEL_NODE:
12851 self._LockInstancesNodes()
12853 def CheckPrereq(self):
12854 """Check prerequisites.
12856 This only checks the optional instance list against the existing names.
12859 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12860 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12861 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12863 if self.wanted_names is None:
12864 assert self.op.use_locking, "Locking was not used"
12865 self.wanted_names = owned_instances
12867 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12869 if self.op.use_locking:
12870 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12873 assert not (owned_instances or owned_groups or owned_nodes)
12875 self.wanted_instances = instances.values()
12877 def _ComputeBlockdevStatus(self, node, instance, dev):
12878 """Returns the status of a block device
12881 if self.op.static or not node:
12884 self.cfg.SetDiskID(dev, node)
12886 result = self.rpc.call_blockdev_find(node, dev)
12890 result.Raise("Can't compute disk status for %s" % instance.name)
12892 status = result.payload
12896 return (status.dev_path, status.major, status.minor,
12897 status.sync_percent, status.estimated_time,
12898 status.is_degraded, status.ldisk_status)
12900 def _ComputeDiskStatus(self, instance, snode, dev):
12901 """Compute block device status.
12904 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12906 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12908 def _ComputeDiskStatusInner(self, instance, snode, dev):
12909 """Compute block device status.
12911 @attention: The device has to be annotated already.
12914 if dev.dev_type in constants.LDS_DRBD:
12915 # we change the snode then (otherwise we use the one passed in)
12916 if dev.logical_id[0] == instance.primary_node:
12917 snode = dev.logical_id[1]
12919 snode = dev.logical_id[0]
12921 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12923 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12926 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12933 "iv_name": dev.iv_name,
12934 "dev_type": dev.dev_type,
12935 "logical_id": dev.logical_id,
12936 "physical_id": dev.physical_id,
12937 "pstatus": dev_pstatus,
12938 "sstatus": dev_sstatus,
12939 "children": dev_children,
12944 def Exec(self, feedback_fn):
12945 """Gather and return data"""
12948 cluster = self.cfg.GetClusterInfo()
12950 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12951 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12953 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12954 for node in nodes.values()))
12956 group2name_fn = lambda uuid: groups[uuid].name
12958 for instance in self.wanted_instances:
12959 pnode = nodes[instance.primary_node]
12961 if self.op.static or pnode.offline:
12962 remote_state = None
12964 self.LogWarning("Primary node %s is marked offline, returning static"
12965 " information only for instance %s" %
12966 (pnode.name, instance.name))
12968 remote_info = self.rpc.call_instance_info(instance.primary_node,
12970 instance.hypervisor)
12971 remote_info.Raise("Error checking node %s" % instance.primary_node)
12972 remote_info = remote_info.payload
12973 if remote_info and "state" in remote_info:
12974 remote_state = "up"
12976 if instance.admin_state == constants.ADMINST_UP:
12977 remote_state = "down"
12979 remote_state = instance.admin_state
12981 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12984 snodes_group_uuids = [nodes[snode_name].group
12985 for snode_name in instance.secondary_nodes]
12987 result[instance.name] = {
12988 "name": instance.name,
12989 "config_state": instance.admin_state,
12990 "run_state": remote_state,
12991 "pnode": instance.primary_node,
12992 "pnode_group_uuid": pnode.group,
12993 "pnode_group_name": group2name_fn(pnode.group),
12994 "snodes": instance.secondary_nodes,
12995 "snodes_group_uuids": snodes_group_uuids,
12996 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12998 # this happens to be the same format used for hooks
12999 "nics": _NICListToTuple(self, instance.nics),
13000 "disk_template": instance.disk_template,
13002 "hypervisor": instance.hypervisor,
13003 "network_port": instance.network_port,
13004 "hv_instance": instance.hvparams,
13005 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13006 "be_instance": instance.beparams,
13007 "be_actual": cluster.FillBE(instance),
13008 "os_instance": instance.osparams,
13009 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13010 "serial_no": instance.serial_no,
13011 "mtime": instance.mtime,
13012 "ctime": instance.ctime,
13013 "uuid": instance.uuid,
13019 def PrepareContainerMods(mods, private_fn):
13020 """Prepares a list of container modifications by adding a private data field.
13022 @type mods: list of tuples; (operation, index, parameters)
13023 @param mods: List of modifications
13024 @type private_fn: callable or None
13025 @param private_fn: Callable for constructing a private data field for a
13030 if private_fn is None:
13035 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13038 #: Type description for changes as returned by L{ApplyContainerMods}'s
13040 _TApplyContModsCbChanges = \
13041 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13042 ht.TNonEmptyString,
13047 def ApplyContainerMods(kind, container, chgdesc, mods,
13048 create_fn, modify_fn, remove_fn):
13049 """Applies descriptions in C{mods} to C{container}.
13052 @param kind: One-word item description
13053 @type container: list
13054 @param container: Container to modify
13055 @type chgdesc: None or list
13056 @param chgdesc: List of applied changes
13058 @param mods: Modifications as returned by L{PrepareContainerMods}
13059 @type create_fn: callable
13060 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13061 receives absolute item index, parameters and private data object as added
13062 by L{PrepareContainerMods}, returns tuple containing new item and changes
13064 @type modify_fn: callable
13065 @param modify_fn: Callback for modifying an existing item
13066 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13067 and private data object as added by L{PrepareContainerMods}, returns
13069 @type remove_fn: callable
13070 @param remove_fn: Callback on removing item; receives absolute item index,
13071 item and private data object as added by L{PrepareContainerMods}
13074 for (op, idx, params, private) in mods:
13077 absidx = len(container) - 1
13079 raise IndexError("Not accepting negative indices other than -1")
13080 elif idx > len(container):
13081 raise IndexError("Got %s index %s, but there are only %s" %
13082 (kind, idx, len(container)))
13088 if op == constants.DDM_ADD:
13089 # Calculate where item will be added
13091 addidx = len(container)
13095 if create_fn is None:
13098 (item, changes) = create_fn(addidx, params, private)
13101 container.append(item)
13104 assert idx <= len(container)
13105 # list.insert does so before the specified index
13106 container.insert(idx, item)
13108 # Retrieve existing item
13110 item = container[absidx]
13112 raise IndexError("Invalid %s index %s" % (kind, idx))
13114 if op == constants.DDM_REMOVE:
13117 if remove_fn is not None:
13118 remove_fn(absidx, item, private)
13120 changes = [("%s/%s" % (kind, absidx), "remove")]
13122 assert container[absidx] == item
13123 del container[absidx]
13124 elif op == constants.DDM_MODIFY:
13125 if modify_fn is not None:
13126 changes = modify_fn(absidx, item, params, private)
13128 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13130 assert _TApplyContModsCbChanges(changes)
13132 if not (chgdesc is None or changes is None):
13133 chgdesc.extend(changes)
13136 def _UpdateIvNames(base_index, disks):
13137 """Updates the C{iv_name} attribute of disks.
13139 @type disks: list of L{objects.Disk}
13142 for (idx, disk) in enumerate(disks):
13143 disk.iv_name = "disk/%s" % (base_index + idx, )
13146 class _InstNicModPrivate:
13147 """Data structure for network interface modifications.
13149 Used by L{LUInstanceSetParams}.
13152 def __init__(self):
13157 class LUInstanceSetParams(LogicalUnit):
13158 """Modifies an instances's parameters.
13161 HPATH = "instance-modify"
13162 HTYPE = constants.HTYPE_INSTANCE
13166 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13167 assert ht.TList(mods)
13168 assert not mods or len(mods[0]) in (2, 3)
13170 if mods and len(mods[0]) == 2:
13174 for op, params in mods:
13175 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13176 result.append((op, -1, params))
13180 raise errors.OpPrereqError("Only one %s add or remove operation is"
13181 " supported at a time" % kind,
13182 errors.ECODE_INVAL)
13184 result.append((constants.DDM_MODIFY, op, params))
13186 assert verify_fn(result)
13193 def _CheckMods(kind, mods, key_types, item_fn):
13194 """Ensures requested disk/NIC modifications are valid.
13197 for (op, _, params) in mods:
13198 assert ht.TDict(params)
13200 # If 'key_types' is an empty dict, we assume we have an
13201 # 'ext' template and thus do not ForceDictType
13203 utils.ForceDictType(params, key_types)
13205 if op == constants.DDM_REMOVE:
13207 raise errors.OpPrereqError("No settings should be passed when"
13208 " removing a %s" % kind,
13209 errors.ECODE_INVAL)
13210 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13211 item_fn(op, params)
13213 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13216 def _VerifyDiskModification(op, params):
13217 """Verifies a disk modification.
13220 if op == constants.DDM_ADD:
13221 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13222 if mode not in constants.DISK_ACCESS_SET:
13223 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13224 errors.ECODE_INVAL)
13226 size = params.get(constants.IDISK_SIZE, None)
13228 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13229 constants.IDISK_SIZE, errors.ECODE_INVAL)
13233 except (TypeError, ValueError), err:
13234 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13235 errors.ECODE_INVAL)
13237 params[constants.IDISK_SIZE] = size
13239 elif op == constants.DDM_MODIFY:
13240 if constants.IDISK_SIZE in params:
13241 raise errors.OpPrereqError("Disk size change not possible, use"
13242 " grow-disk", errors.ECODE_INVAL)
13243 if constants.IDISK_MODE not in params:
13244 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13245 " modification supported, but missing",
13246 errors.ECODE_NOENT)
13247 if len(params) > 1:
13248 raise errors.OpPrereqError("Disk modification doesn't support"
13249 " additional arbitrary parameters",
13250 errors.ECODE_INVAL)
13253 def _VerifyNicModification(op, params):
13254 """Verifies a network interface modification.
13257 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13258 ip = params.get(constants.INIC_IP, None)
13259 req_net = params.get(constants.INIC_NETWORK, None)
13260 link = params.get(constants.NIC_LINK, None)
13261 mode = params.get(constants.NIC_MODE, None)
13262 if req_net is not None:
13263 if req_net.lower() == constants.VALUE_NONE:
13264 params[constants.INIC_NETWORK] = None
13266 elif link is not None or mode is not None:
13267 raise errors.OpPrereqError("If network is given"
13268 " mode or link should not",
13269 errors.ECODE_INVAL)
13271 if op == constants.DDM_ADD:
13272 macaddr = params.get(constants.INIC_MAC, None)
13273 if macaddr is None:
13274 params[constants.INIC_MAC] = constants.VALUE_AUTO
13277 if ip.lower() == constants.VALUE_NONE:
13278 params[constants.INIC_IP] = None
13280 if ip.lower() == constants.NIC_IP_POOL:
13281 if op == constants.DDM_ADD and req_net is None:
13282 raise errors.OpPrereqError("If ip=pool, parameter network"
13284 errors.ECODE_INVAL)
13286 if not netutils.IPAddress.IsValid(ip):
13287 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13288 errors.ECODE_INVAL)
13290 if constants.INIC_MAC in params:
13291 macaddr = params[constants.INIC_MAC]
13292 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13293 macaddr = utils.NormalizeAndValidateMac(macaddr)
13295 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13296 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13297 " modifying an existing NIC",
13298 errors.ECODE_INVAL)
13300 def CheckArguments(self):
13301 if not (self.op.nics or self.op.disks or self.op.disk_template or
13302 self.op.hvparams or self.op.beparams or self.op.os_name or
13303 self.op.offline is not None or self.op.runtime_mem):
13304 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13306 if self.op.hvparams:
13307 _CheckGlobalHvParams(self.op.hvparams)
13309 self.op.disks = self._UpgradeDiskNicMods(
13310 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13311 self.op.nics = self._UpgradeDiskNicMods(
13312 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13314 if self.op.disks and self.op.disk_template is not None:
13315 raise errors.OpPrereqError("Disk template conversion and other disk"
13316 " changes not supported at the same time",
13317 errors.ECODE_INVAL)
13319 if (self.op.disk_template and
13320 self.op.disk_template in constants.DTS_INT_MIRROR and
13321 self.op.remote_node is None):
13322 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13323 " one requires specifying a secondary node",
13324 errors.ECODE_INVAL)
13326 # Check NIC modifications
13327 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13328 self._VerifyNicModification)
13330 def ExpandNames(self):
13331 self._ExpandAndLockInstance()
13332 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13333 # Can't even acquire node locks in shared mode as upcoming changes in
13334 # Ganeti 2.6 will start to modify the node object on disk conversion
13335 self.needed_locks[locking.LEVEL_NODE] = []
13336 self.needed_locks[locking.LEVEL_NODE_RES] = []
13337 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13338 # Look node group to look up the ipolicy
13339 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13341 def DeclareLocks(self, level):
13342 if level == locking.LEVEL_NODEGROUP:
13343 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13344 # Acquire locks for the instance's nodegroups optimistically. Needs
13345 # to be verified in CheckPrereq
13346 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13347 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13348 elif level == locking.LEVEL_NODE:
13349 self._LockInstancesNodes()
13350 if self.op.disk_template and self.op.remote_node:
13351 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13352 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13353 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13355 self.needed_locks[locking.LEVEL_NODE_RES] = \
13356 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13358 def BuildHooksEnv(self):
13359 """Build hooks env.
13361 This runs on the master, primary and secondaries.
13365 if constants.BE_MINMEM in self.be_new:
13366 args["minmem"] = self.be_new[constants.BE_MINMEM]
13367 if constants.BE_MAXMEM in self.be_new:
13368 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13369 if constants.BE_VCPUS in self.be_new:
13370 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13371 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13372 # information at all.
13374 if self._new_nics is not None:
13377 for nic in self._new_nics:
13378 n = copy.deepcopy(nic)
13379 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13380 n.nicparams = nicparams
13381 nics.append(_NICToTuple(self, n))
13383 args["nics"] = nics
13385 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13386 if self.op.disk_template:
13387 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13388 if self.op.runtime_mem:
13389 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13393 def BuildHooksNodes(self):
13394 """Build hooks nodes.
13397 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13400 def _PrepareNicModification(self, params, private, old_ip, old_net,
13401 old_params, cluster, pnode):
13403 update_params_dict = dict([(key, params[key])
13404 for key in constants.NICS_PARAMETERS
13407 req_link = update_params_dict.get(constants.NIC_LINK, None)
13408 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13410 new_net = params.get(constants.INIC_NETWORK, old_net)
13411 if new_net is not None:
13412 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13413 if netparams is None:
13414 raise errors.OpPrereqError("No netparams found for the network"
13415 " %s, probably not connected" % new_net,
13416 errors.ECODE_INVAL)
13417 new_params = dict(netparams)
13419 new_params = _GetUpdatedParams(old_params, update_params_dict)
13421 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13423 new_filled_params = cluster.SimpleFillNIC(new_params)
13424 objects.NIC.CheckParameterSyntax(new_filled_params)
13426 new_mode = new_filled_params[constants.NIC_MODE]
13427 if new_mode == constants.NIC_MODE_BRIDGED:
13428 bridge = new_filled_params[constants.NIC_LINK]
13429 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13431 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13433 self.warn.append(msg)
13435 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13437 elif new_mode == constants.NIC_MODE_ROUTED:
13438 ip = params.get(constants.INIC_IP, old_ip)
13440 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13441 " on a routed NIC", errors.ECODE_INVAL)
13443 elif new_mode == constants.NIC_MODE_OVS:
13444 # TODO: check OVS link
13445 self.LogInfo("OVS links are currently not checked for correctness")
13447 if constants.INIC_MAC in params:
13448 mac = params[constants.INIC_MAC]
13450 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13451 errors.ECODE_INVAL)
13452 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13453 # otherwise generate the MAC address
13454 params[constants.INIC_MAC] = \
13455 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13457 # or validate/reserve the current one
13459 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13460 except errors.ReservationError:
13461 raise errors.OpPrereqError("MAC address '%s' already in use"
13462 " in cluster" % mac,
13463 errors.ECODE_NOTUNIQUE)
13464 elif new_net != old_net:
13466 def get_net_prefix(net):
13468 uuid = self.cfg.LookupNetwork(net)
13470 nobj = self.cfg.GetNetwork(uuid)
13471 return nobj.mac_prefix
13474 new_prefix = get_net_prefix(new_net)
13475 old_prefix = get_net_prefix(old_net)
13476 if old_prefix != new_prefix:
13477 params[constants.INIC_MAC] = \
13478 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13480 #if there is a change in nic-network configuration
13481 new_ip = params.get(constants.INIC_IP, old_ip)
13482 if (new_ip, new_net) != (old_ip, old_net):
13485 if new_ip.lower() == constants.NIC_IP_POOL:
13487 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13488 except errors.ReservationError:
13489 raise errors.OpPrereqError("Unable to get a free IP"
13490 " from the address pool",
13491 errors.ECODE_STATE)
13492 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13493 params[constants.INIC_IP] = new_ip
13494 elif new_ip != old_ip or new_net != old_net:
13496 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13497 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13498 except errors.ReservationError:
13499 raise errors.OpPrereqError("IP %s not available in network %s" %
13501 errors.ECODE_NOTUNIQUE)
13502 elif new_ip.lower() == constants.NIC_IP_POOL:
13503 raise errors.OpPrereqError("ip=pool, but no network found",
13504 errors.ECODE_INVAL)
13507 elif self.op.conflicts_check:
13508 _CheckForConflictingIp(self, new_ip, pnode)
13513 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13514 except errors.AddressPoolError:
13515 logging.warning("Release IP %s not contained in network %s",
13518 # there are no changes in (net, ip) tuple
13519 elif (old_net is not None and
13520 (req_link is not None or req_mode is not None)):
13521 raise errors.OpPrereqError("Not allowed to change link or mode of"
13522 " a NIC that is connected to a network",
13523 errors.ECODE_INVAL)
13525 private.params = new_params
13526 private.filled = new_filled_params
13528 def _PreCheckDiskTemplate(self, pnode_info):
13529 """CheckPrereq checks related to a new disk template."""
13530 # Arguments are passed to avoid configuration lookups
13531 instance = self.instance
13532 pnode = instance.primary_node
13533 cluster = self.cluster
13534 if instance.disk_template == self.op.disk_template:
13535 raise errors.OpPrereqError("Instance already has disk template %s" %
13536 instance.disk_template, errors.ECODE_INVAL)
13538 if (instance.disk_template,
13539 self.op.disk_template) not in self._DISK_CONVERSIONS:
13540 raise errors.OpPrereqError("Unsupported disk template conversion from"
13541 " %s to %s" % (instance.disk_template,
13542 self.op.disk_template),
13543 errors.ECODE_INVAL)
13544 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13545 msg="cannot change disk template")
13546 if self.op.disk_template in constants.DTS_INT_MIRROR:
13547 if self.op.remote_node == pnode:
13548 raise errors.OpPrereqError("Given new secondary node %s is the same"
13549 " as the primary node of the instance" %
13550 self.op.remote_node, errors.ECODE_STATE)
13551 _CheckNodeOnline(self, self.op.remote_node)
13552 _CheckNodeNotDrained(self, self.op.remote_node)
13553 # FIXME: here we assume that the old instance type is DT_PLAIN
13554 assert instance.disk_template == constants.DT_PLAIN
13555 disks = [{constants.IDISK_SIZE: d.size,
13556 constants.IDISK_VG: d.logical_id[0]}
13557 for d in instance.disks]
13558 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13559 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13561 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13562 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13563 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13565 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13566 ignore=self.op.ignore_ipolicy)
13567 if pnode_info.group != snode_info.group:
13568 self.LogWarning("The primary and secondary nodes are in two"
13569 " different node groups; the disk parameters"
13570 " from the first disk's node group will be"
13573 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13574 # Make sure none of the nodes require exclusive storage
13575 nodes = [pnode_info]
13576 if self.op.disk_template in constants.DTS_INT_MIRROR:
13578 nodes.append(snode_info)
13579 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13580 if compat.any(map(has_es, nodes)):
13581 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13582 " storage is enabled" % (instance.disk_template,
13583 self.op.disk_template))
13584 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13586 def CheckPrereq(self):
13587 """Check prerequisites.
13589 This only checks the instance list against the existing names.
13592 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13593 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13595 cluster = self.cluster = self.cfg.GetClusterInfo()
13596 assert self.instance is not None, \
13597 "Cannot retrieve locked instance %s" % self.op.instance_name
13599 pnode = instance.primary_node
13600 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13601 nodelist = list(instance.all_nodes)
13602 pnode_info = self.cfg.GetNodeInfo(pnode)
13603 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13605 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13606 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13607 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13609 # dictionary with instance information after the modification
13612 # Check disk modifications. This is done here and not in CheckArguments
13613 # (as with NICs), because we need to know the instance's disk template
13614 if instance.disk_template == constants.DT_EXT:
13615 self._CheckMods("disk", self.op.disks, {},
13616 self._VerifyDiskModification)
13618 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13619 self._VerifyDiskModification)
13621 # Prepare disk/NIC modifications
13622 self.diskmod = PrepareContainerMods(self.op.disks, None)
13623 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13625 # Check the validity of the `provider' parameter
13626 if instance.disk_template in constants.DT_EXT:
13627 for mod in self.diskmod:
13628 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13629 if mod[0] == constants.DDM_ADD:
13630 if ext_provider is None:
13631 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13632 " '%s' missing, during disk add" %
13634 constants.IDISK_PROVIDER),
13635 errors.ECODE_NOENT)
13636 elif mod[0] == constants.DDM_MODIFY:
13638 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13640 constants.IDISK_PROVIDER,
13641 errors.ECODE_INVAL)
13643 for mod in self.diskmod:
13644 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13645 if ext_provider is not None:
13646 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13647 " instances of type '%s'" %
13648 (constants.IDISK_PROVIDER,
13650 errors.ECODE_INVAL)
13653 if self.op.os_name and not self.op.force:
13654 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13655 self.op.force_variant)
13656 instance_os = self.op.os_name
13658 instance_os = instance.os
13660 assert not (self.op.disk_template and self.op.disks), \
13661 "Can't modify disk template and apply disk changes at the same time"
13663 if self.op.disk_template:
13664 self._PreCheckDiskTemplate(pnode_info)
13666 # hvparams processing
13667 if self.op.hvparams:
13668 hv_type = instance.hypervisor
13669 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13670 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13671 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13674 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13675 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13676 self.hv_proposed = self.hv_new = hv_new # the new actual values
13677 self.hv_inst = i_hvdict # the new dict (without defaults)
13679 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13681 self.hv_new = self.hv_inst = {}
13683 # beparams processing
13684 if self.op.beparams:
13685 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13687 objects.UpgradeBeParams(i_bedict)
13688 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13689 be_new = cluster.SimpleFillBE(i_bedict)
13690 self.be_proposed = self.be_new = be_new # the new actual values
13691 self.be_inst = i_bedict # the new dict (without defaults)
13693 self.be_new = self.be_inst = {}
13694 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13695 be_old = cluster.FillBE(instance)
13697 # CPU param validation -- checking every time a parameter is
13698 # changed to cover all cases where either CPU mask or vcpus have
13700 if (constants.BE_VCPUS in self.be_proposed and
13701 constants.HV_CPU_MASK in self.hv_proposed):
13703 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13704 # Verify mask is consistent with number of vCPUs. Can skip this
13705 # test if only 1 entry in the CPU mask, which means same mask
13706 # is applied to all vCPUs.
13707 if (len(cpu_list) > 1 and
13708 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13709 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13711 (self.be_proposed[constants.BE_VCPUS],
13712 self.hv_proposed[constants.HV_CPU_MASK]),
13713 errors.ECODE_INVAL)
13715 # Only perform this test if a new CPU mask is given
13716 if constants.HV_CPU_MASK in self.hv_new:
13717 # Calculate the largest CPU number requested
13718 max_requested_cpu = max(map(max, cpu_list))
13719 # Check that all of the instance's nodes have enough physical CPUs to
13720 # satisfy the requested CPU mask
13721 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13722 max_requested_cpu + 1, instance.hypervisor)
13724 # osparams processing
13725 if self.op.osparams:
13726 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13727 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13728 self.os_inst = i_osdict # the new dict (without defaults)
13734 #TODO(dynmem): do the appropriate check involving MINMEM
13735 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13736 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13737 mem_check_list = [pnode]
13738 if be_new[constants.BE_AUTO_BALANCE]:
13739 # either we changed auto_balance to yes or it was from before
13740 mem_check_list.extend(instance.secondary_nodes)
13741 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13742 instance.hypervisor)
13743 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13744 [instance.hypervisor], False)
13745 pninfo = nodeinfo[pnode]
13746 msg = pninfo.fail_msg
13748 # Assume the primary node is unreachable and go ahead
13749 self.warn.append("Can't get info from primary node %s: %s" %
13752 (_, _, (pnhvinfo, )) = pninfo.payload
13753 if not isinstance(pnhvinfo.get("memory_free", None), int):
13754 self.warn.append("Node data from primary node %s doesn't contain"
13755 " free memory information" % pnode)
13756 elif instance_info.fail_msg:
13757 self.warn.append("Can't get instance runtime information: %s" %
13758 instance_info.fail_msg)
13760 if instance_info.payload:
13761 current_mem = int(instance_info.payload["memory"])
13763 # Assume instance not running
13764 # (there is a slight race condition here, but it's not very
13765 # probable, and we have no other way to check)
13766 # TODO: Describe race condition
13768 #TODO(dynmem): do the appropriate check involving MINMEM
13769 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13770 pnhvinfo["memory_free"])
13772 raise errors.OpPrereqError("This change will prevent the instance"
13773 " from starting, due to %d MB of memory"
13774 " missing on its primary node" %
13775 miss_mem, errors.ECODE_NORES)
13777 if be_new[constants.BE_AUTO_BALANCE]:
13778 for node, nres in nodeinfo.items():
13779 if node not in instance.secondary_nodes:
13781 nres.Raise("Can't get info from secondary node %s" % node,
13782 prereq=True, ecode=errors.ECODE_STATE)
13783 (_, _, (nhvinfo, )) = nres.payload
13784 if not isinstance(nhvinfo.get("memory_free", None), int):
13785 raise errors.OpPrereqError("Secondary node %s didn't return free"
13786 " memory information" % node,
13787 errors.ECODE_STATE)
13788 #TODO(dynmem): do the appropriate check involving MINMEM
13789 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13790 raise errors.OpPrereqError("This change will prevent the instance"
13791 " from failover to its secondary node"
13792 " %s, due to not enough memory" % node,
13793 errors.ECODE_STATE)
13795 if self.op.runtime_mem:
13796 remote_info = self.rpc.call_instance_info(instance.primary_node,
13798 instance.hypervisor)
13799 remote_info.Raise("Error checking node %s" % instance.primary_node)
13800 if not remote_info.payload: # not running already
13801 raise errors.OpPrereqError("Instance %s is not running" %
13802 instance.name, errors.ECODE_STATE)
13804 current_memory = remote_info.payload["memory"]
13805 if (not self.op.force and
13806 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13807 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13808 raise errors.OpPrereqError("Instance %s must have memory between %d"
13809 " and %d MB of memory unless --force is"
13812 self.be_proposed[constants.BE_MINMEM],
13813 self.be_proposed[constants.BE_MAXMEM]),
13814 errors.ECODE_INVAL)
13816 delta = self.op.runtime_mem - current_memory
13818 _CheckNodeFreeMemory(self, instance.primary_node,
13819 "ballooning memory for instance %s" %
13820 instance.name, delta, instance.hypervisor)
13822 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13823 raise errors.OpPrereqError("Disk operations not supported for"
13824 " diskless instances", errors.ECODE_INVAL)
13826 def _PrepareNicCreate(_, params, private):
13827 self._PrepareNicModification(params, private, None, None,
13828 {}, cluster, pnode)
13829 return (None, None)
13831 def _PrepareNicMod(_, nic, params, private):
13832 self._PrepareNicModification(params, private, nic.ip, nic.network,
13833 nic.nicparams, cluster, pnode)
13836 def _PrepareNicRemove(_, params, __):
13838 net = params.network
13839 if net is not None and ip is not None:
13840 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13842 # Verify NIC changes (operating on copy)
13843 nics = instance.nics[:]
13844 ApplyContainerMods("NIC", nics, None, self.nicmod,
13845 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13846 if len(nics) > constants.MAX_NICS:
13847 raise errors.OpPrereqError("Instance has too many network interfaces"
13848 " (%d), cannot add more" % constants.MAX_NICS,
13849 errors.ECODE_STATE)
13851 # Verify disk changes (operating on a copy)
13852 disks = instance.disks[:]
13853 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13854 if len(disks) > constants.MAX_DISKS:
13855 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13856 " more" % constants.MAX_DISKS,
13857 errors.ECODE_STATE)
13858 disk_sizes = [disk.size for disk in instance.disks]
13859 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13860 self.diskmod if op == constants.DDM_ADD)
13861 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13862 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13864 if self.op.offline is not None and self.op.offline:
13865 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13866 msg="can't change to offline")
13868 # Pre-compute NIC changes (necessary to use result in hooks)
13869 self._nic_chgdesc = []
13871 # Operate on copies as this is still in prereq
13872 nics = [nic.Copy() for nic in instance.nics]
13873 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13874 self._CreateNewNic, self._ApplyNicMods, None)
13875 self._new_nics = nics
13876 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13878 self._new_nics = None
13879 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13881 if not self.op.ignore_ipolicy:
13882 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13885 # Fill ispec with backend parameters
13886 ispec[constants.ISPEC_SPINDLE_USE] = \
13887 self.be_new.get(constants.BE_SPINDLE_USE, None)
13888 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13891 # Copy ispec to verify parameters with min/max values separately
13892 ispec_max = ispec.copy()
13893 ispec_max[constants.ISPEC_MEM_SIZE] = \
13894 self.be_new.get(constants.BE_MAXMEM, None)
13895 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13896 ispec_min = ispec.copy()
13897 ispec_min[constants.ISPEC_MEM_SIZE] = \
13898 self.be_new.get(constants.BE_MINMEM, None)
13899 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13901 if (res_max or res_min):
13902 # FIXME: Improve error message by including information about whether
13903 # the upper or lower limit of the parameter fails the ipolicy.
13904 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13905 (group_info, group_info.name,
13906 utils.CommaJoin(set(res_max + res_min))))
13907 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13909 def _ConvertPlainToDrbd(self, feedback_fn):
13910 """Converts an instance from plain to drbd.
13913 feedback_fn("Converting template to drbd")
13914 instance = self.instance
13915 pnode = instance.primary_node
13916 snode = self.op.remote_node
13918 assert instance.disk_template == constants.DT_PLAIN
13920 # create a fake disk info for _GenerateDiskTemplate
13921 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13922 constants.IDISK_VG: d.logical_id[0]}
13923 for d in instance.disks]
13924 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13925 instance.name, pnode, [snode],
13926 disk_info, None, None, 0, feedback_fn,
13928 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13930 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13931 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13932 info = _GetInstanceInfoText(instance)
13933 feedback_fn("Creating additional volumes...")
13934 # first, create the missing data and meta devices
13935 for disk in anno_disks:
13936 # unfortunately this is... not too nice
13937 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13938 info, True, p_excl_stor)
13939 for child in disk.children:
13940 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13942 # at this stage, all new LVs have been created, we can rename the
13944 feedback_fn("Renaming original volumes...")
13945 rename_list = [(o, n.children[0].logical_id)
13946 for (o, n) in zip(instance.disks, new_disks)]
13947 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13948 result.Raise("Failed to rename original LVs")
13950 feedback_fn("Initializing DRBD devices...")
13951 # all child devices are in place, we can now create the DRBD devices
13952 for disk in anno_disks:
13953 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13954 f_create = node == pnode
13955 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13958 # at this point, the instance has been modified
13959 instance.disk_template = constants.DT_DRBD8
13960 instance.disks = new_disks
13961 self.cfg.Update(instance, feedback_fn)
13963 # Release node locks while waiting for sync
13964 _ReleaseLocks(self, locking.LEVEL_NODE)
13966 # disks are created, waiting for sync
13967 disk_abort = not _WaitForSync(self, instance,
13968 oneshot=not self.op.wait_for_sync)
13970 raise errors.OpExecError("There are some degraded disks for"
13971 " this instance, please cleanup manually")
13973 # Node resource locks will be released by caller
13975 def _ConvertDrbdToPlain(self, feedback_fn):
13976 """Converts an instance from drbd to plain.
13979 instance = self.instance
13981 assert len(instance.secondary_nodes) == 1
13982 assert instance.disk_template == constants.DT_DRBD8
13984 pnode = instance.primary_node
13985 snode = instance.secondary_nodes[0]
13986 feedback_fn("Converting template to plain")
13988 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13989 new_disks = [d.children[0] for d in instance.disks]
13991 # copy over size and mode
13992 for parent, child in zip(old_disks, new_disks):
13993 child.size = parent.size
13994 child.mode = parent.mode
13996 # this is a DRBD disk, return its port to the pool
13997 # NOTE: this must be done right before the call to cfg.Update!
13998 for disk in old_disks:
13999 tcp_port = disk.logical_id[2]
14000 self.cfg.AddTcpUdpPort(tcp_port)
14002 # update instance structure
14003 instance.disks = new_disks
14004 instance.disk_template = constants.DT_PLAIN
14005 self.cfg.Update(instance, feedback_fn)
14007 # Release locks in case removing disks takes a while
14008 _ReleaseLocks(self, locking.LEVEL_NODE)
14010 feedback_fn("Removing volumes on the secondary node...")
14011 for disk in old_disks:
14012 self.cfg.SetDiskID(disk, snode)
14013 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14015 self.LogWarning("Could not remove block device %s on node %s,"
14016 " continuing anyway: %s", disk.iv_name, snode, msg)
14018 feedback_fn("Removing unneeded volumes on the primary node...")
14019 for idx, disk in enumerate(old_disks):
14020 meta = disk.children[1]
14021 self.cfg.SetDiskID(meta, pnode)
14022 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14024 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14025 " continuing anyway: %s", idx, pnode, msg)
14027 def _CreateNewDisk(self, idx, params, _):
14028 """Creates a new disk.
14031 instance = self.instance
14034 if instance.disk_template in constants.DTS_FILEBASED:
14035 (file_driver, file_path) = instance.disks[0].logical_id
14036 file_path = os.path.dirname(file_path)
14038 file_driver = file_path = None
14041 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14042 instance.primary_node, instance.secondary_nodes,
14043 [params], file_path, file_driver, idx,
14044 self.Log, self.diskparams)[0]
14046 info = _GetInstanceInfoText(instance)
14048 logging.info("Creating volume %s for instance %s",
14049 disk.iv_name, instance.name)
14050 # Note: this needs to be kept in sync with _CreateDisks
14052 for node in instance.all_nodes:
14053 f_create = (node == instance.primary_node)
14055 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14056 except errors.OpExecError, err:
14057 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14058 disk.iv_name, disk, node, err)
14061 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14065 def _ModifyDisk(idx, disk, params, _):
14066 """Modifies a disk.
14069 disk.mode = params[constants.IDISK_MODE]
14072 ("disk.mode/%d" % idx, disk.mode),
14075 def _RemoveDisk(self, idx, root, _):
14079 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14080 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14081 self.cfg.SetDiskID(disk, node)
14082 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14084 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14085 " continuing anyway", idx, node, msg)
14087 # if this is a DRBD disk, return its port to the pool
14088 if root.dev_type in constants.LDS_DRBD:
14089 self.cfg.AddTcpUdpPort(root.logical_id[2])
14092 def _CreateNewNic(idx, params, private):
14093 """Creates data structure for a new network interface.
14096 mac = params[constants.INIC_MAC]
14097 ip = params.get(constants.INIC_IP, None)
14098 net = params.get(constants.INIC_NETWORK, None)
14099 #TODO: not private.filled?? can a nic have no nicparams??
14100 nicparams = private.filled
14102 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14104 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14105 (mac, ip, private.filled[constants.NIC_MODE],
14106 private.filled[constants.NIC_LINK],
14111 def _ApplyNicMods(idx, nic, params, private):
14112 """Modifies a network interface.
14117 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14119 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14120 setattr(nic, key, params[key])
14123 nic.nicparams = private.filled
14125 for (key, val) in nic.nicparams.items():
14126 changes.append(("nic.%s/%d" % (key, idx), val))
14130 def Exec(self, feedback_fn):
14131 """Modifies an instance.
14133 All parameters take effect only at the next restart of the instance.
14136 # Process here the warnings from CheckPrereq, as we don't have a
14137 # feedback_fn there.
14138 # TODO: Replace with self.LogWarning
14139 for warn in self.warn:
14140 feedback_fn("WARNING: %s" % warn)
14142 assert ((self.op.disk_template is None) ^
14143 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14144 "Not owning any node resource locks"
14147 instance = self.instance
14150 if self.op.runtime_mem:
14151 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14153 self.op.runtime_mem)
14154 rpcres.Raise("Cannot modify instance runtime memory")
14155 result.append(("runtime_memory", self.op.runtime_mem))
14157 # Apply disk changes
14158 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14159 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14160 _UpdateIvNames(0, instance.disks)
14162 if self.op.disk_template:
14164 check_nodes = set(instance.all_nodes)
14165 if self.op.remote_node:
14166 check_nodes.add(self.op.remote_node)
14167 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14168 owned = self.owned_locks(level)
14169 assert not (check_nodes - owned), \
14170 ("Not owning the correct locks, owning %r, expected at least %r" %
14171 (owned, check_nodes))
14173 r_shut = _ShutdownInstanceDisks(self, instance)
14175 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14176 " proceed with disk template conversion")
14177 mode = (instance.disk_template, self.op.disk_template)
14179 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14181 self.cfg.ReleaseDRBDMinors(instance.name)
14183 result.append(("disk_template", self.op.disk_template))
14185 assert instance.disk_template == self.op.disk_template, \
14186 ("Expected disk template '%s', found '%s'" %
14187 (self.op.disk_template, instance.disk_template))
14189 # Release node and resource locks if there are any (they might already have
14190 # been released during disk conversion)
14191 _ReleaseLocks(self, locking.LEVEL_NODE)
14192 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14194 # Apply NIC changes
14195 if self._new_nics is not None:
14196 instance.nics = self._new_nics
14197 result.extend(self._nic_chgdesc)
14200 if self.op.hvparams:
14201 instance.hvparams = self.hv_inst
14202 for key, val in self.op.hvparams.iteritems():
14203 result.append(("hv/%s" % key, val))
14206 if self.op.beparams:
14207 instance.beparams = self.be_inst
14208 for key, val in self.op.beparams.iteritems():
14209 result.append(("be/%s" % key, val))
14212 if self.op.os_name:
14213 instance.os = self.op.os_name
14216 if self.op.osparams:
14217 instance.osparams = self.os_inst
14218 for key, val in self.op.osparams.iteritems():
14219 result.append(("os/%s" % key, val))
14221 if self.op.offline is None:
14224 elif self.op.offline:
14225 # Mark instance as offline
14226 self.cfg.MarkInstanceOffline(instance.name)
14227 result.append(("admin_state", constants.ADMINST_OFFLINE))
14229 # Mark instance as online, but stopped
14230 self.cfg.MarkInstanceDown(instance.name)
14231 result.append(("admin_state", constants.ADMINST_DOWN))
14233 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14235 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14236 self.owned_locks(locking.LEVEL_NODE)), \
14237 "All node locks should have been released by now"
14241 _DISK_CONVERSIONS = {
14242 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14243 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14247 class LUInstanceChangeGroup(LogicalUnit):
14248 HPATH = "instance-change-group"
14249 HTYPE = constants.HTYPE_INSTANCE
14252 def ExpandNames(self):
14253 self.share_locks = _ShareAll()
14255 self.needed_locks = {
14256 locking.LEVEL_NODEGROUP: [],
14257 locking.LEVEL_NODE: [],
14258 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14261 self._ExpandAndLockInstance()
14263 if self.op.target_groups:
14264 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14265 self.op.target_groups)
14267 self.req_target_uuids = None
14269 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14271 def DeclareLocks(self, level):
14272 if level == locking.LEVEL_NODEGROUP:
14273 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14275 if self.req_target_uuids:
14276 lock_groups = set(self.req_target_uuids)
14278 # Lock all groups used by instance optimistically; this requires going
14279 # via the node before it's locked, requiring verification later on
14280 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14281 lock_groups.update(instance_groups)
14283 # No target groups, need to lock all of them
14284 lock_groups = locking.ALL_SET
14286 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14288 elif level == locking.LEVEL_NODE:
14289 if self.req_target_uuids:
14290 # Lock all nodes used by instances
14291 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14292 self._LockInstancesNodes()
14294 # Lock all nodes in all potential target groups
14295 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14296 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14297 member_nodes = [node_name
14298 for group in lock_groups
14299 for node_name in self.cfg.GetNodeGroup(group).members]
14300 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14302 # Lock all nodes as all groups are potential targets
14303 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14305 def CheckPrereq(self):
14306 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14307 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14308 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14310 assert (self.req_target_uuids is None or
14311 owned_groups.issuperset(self.req_target_uuids))
14312 assert owned_instances == set([self.op.instance_name])
14314 # Get instance information
14315 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14317 # Check if node groups for locked instance are still correct
14318 assert owned_nodes.issuperset(self.instance.all_nodes), \
14319 ("Instance %s's nodes changed while we kept the lock" %
14320 self.op.instance_name)
14322 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14325 if self.req_target_uuids:
14326 # User requested specific target groups
14327 self.target_uuids = frozenset(self.req_target_uuids)
14329 # All groups except those used by the instance are potential targets
14330 self.target_uuids = owned_groups - inst_groups
14332 conflicting_groups = self.target_uuids & inst_groups
14333 if conflicting_groups:
14334 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14335 " used by the instance '%s'" %
14336 (utils.CommaJoin(conflicting_groups),
14337 self.op.instance_name),
14338 errors.ECODE_INVAL)
14340 if not self.target_uuids:
14341 raise errors.OpPrereqError("There are no possible target groups",
14342 errors.ECODE_INVAL)
14344 def BuildHooksEnv(self):
14345 """Build hooks env.
14348 assert self.target_uuids
14351 "TARGET_GROUPS": " ".join(self.target_uuids),
14354 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14358 def BuildHooksNodes(self):
14359 """Build hooks nodes.
14362 mn = self.cfg.GetMasterNode()
14363 return ([mn], [mn])
14365 def Exec(self, feedback_fn):
14366 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14368 assert instances == [self.op.instance_name], "Instance not locked"
14370 req = iallocator.IAReqGroupChange(instances=instances,
14371 target_groups=list(self.target_uuids))
14372 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14374 ial.Run(self.op.iallocator)
14376 if not ial.success:
14377 raise errors.OpPrereqError("Can't compute solution for changing group of"
14378 " instance '%s' using iallocator '%s': %s" %
14379 (self.op.instance_name, self.op.iallocator,
14380 ial.info), errors.ECODE_NORES)
14382 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14384 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14385 " instance '%s'", len(jobs), self.op.instance_name)
14387 return ResultWithJobs(jobs)
14390 class LUBackupQuery(NoHooksLU):
14391 """Query the exports list
14396 def CheckArguments(self):
14397 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14398 ["node", "export"], self.op.use_locking)
14400 def ExpandNames(self):
14401 self.expq.ExpandNames(self)
14403 def DeclareLocks(self, level):
14404 self.expq.DeclareLocks(self, level)
14406 def Exec(self, feedback_fn):
14409 for (node, expname) in self.expq.OldStyleQuery(self):
14410 if expname is None:
14411 result[node] = False
14413 result.setdefault(node, []).append(expname)
14418 class _ExportQuery(_QueryBase):
14419 FIELDS = query.EXPORT_FIELDS
14421 #: The node name is not a unique key for this query
14422 SORT_FIELD = "node"
14424 def ExpandNames(self, lu):
14425 lu.needed_locks = {}
14427 # The following variables interact with _QueryBase._GetNames
14429 self.wanted = _GetWantedNodes(lu, self.names)
14431 self.wanted = locking.ALL_SET
14433 self.do_locking = self.use_locking
14435 if self.do_locking:
14436 lu.share_locks = _ShareAll()
14437 lu.needed_locks = {
14438 locking.LEVEL_NODE: self.wanted,
14442 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14444 def DeclareLocks(self, lu, level):
14447 def _GetQueryData(self, lu):
14448 """Computes the list of nodes and their attributes.
14451 # Locking is not used
14453 assert not (compat.any(lu.glm.is_owned(level)
14454 for level in locking.LEVELS
14455 if level != locking.LEVEL_CLUSTER) or
14456 self.do_locking or self.use_locking)
14458 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14462 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14464 result.append((node, None))
14466 result.extend((node, expname) for expname in nres.payload)
14471 class LUBackupPrepare(NoHooksLU):
14472 """Prepares an instance for an export and returns useful information.
14477 def ExpandNames(self):
14478 self._ExpandAndLockInstance()
14480 def CheckPrereq(self):
14481 """Check prerequisites.
14484 instance_name = self.op.instance_name
14486 self.instance = self.cfg.GetInstanceInfo(instance_name)
14487 assert self.instance is not None, \
14488 "Cannot retrieve locked instance %s" % self.op.instance_name
14489 _CheckNodeOnline(self, self.instance.primary_node)
14491 self._cds = _GetClusterDomainSecret()
14493 def Exec(self, feedback_fn):
14494 """Prepares an instance for an export.
14497 instance = self.instance
14499 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14500 salt = utils.GenerateSecret(8)
14502 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14503 result = self.rpc.call_x509_cert_create(instance.primary_node,
14504 constants.RIE_CERT_VALIDITY)
14505 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14507 (name, cert_pem) = result.payload
14509 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14513 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14514 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14516 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14522 class LUBackupExport(LogicalUnit):
14523 """Export an instance to an image in the cluster.
14526 HPATH = "instance-export"
14527 HTYPE = constants.HTYPE_INSTANCE
14530 def CheckArguments(self):
14531 """Check the arguments.
14534 self.x509_key_name = self.op.x509_key_name
14535 self.dest_x509_ca_pem = self.op.destination_x509_ca
14537 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14538 if not self.x509_key_name:
14539 raise errors.OpPrereqError("Missing X509 key name for encryption",
14540 errors.ECODE_INVAL)
14542 if not self.dest_x509_ca_pem:
14543 raise errors.OpPrereqError("Missing destination X509 CA",
14544 errors.ECODE_INVAL)
14546 def ExpandNames(self):
14547 self._ExpandAndLockInstance()
14549 # Lock all nodes for local exports
14550 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14551 # FIXME: lock only instance primary and destination node
14553 # Sad but true, for now we have do lock all nodes, as we don't know where
14554 # the previous export might be, and in this LU we search for it and
14555 # remove it from its current node. In the future we could fix this by:
14556 # - making a tasklet to search (share-lock all), then create the
14557 # new one, then one to remove, after
14558 # - removing the removal operation altogether
14559 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14561 # Allocations should be stopped while this LU runs with node locks, but
14562 # it doesn't have to be exclusive
14563 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14564 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14566 def DeclareLocks(self, level):
14567 """Last minute lock declaration."""
14568 # All nodes are locked anyway, so nothing to do here.
14570 def BuildHooksEnv(self):
14571 """Build hooks env.
14573 This will run on the master, primary node and target node.
14577 "EXPORT_MODE": self.op.mode,
14578 "EXPORT_NODE": self.op.target_node,
14579 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14580 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14581 # TODO: Generic function for boolean env variables
14582 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14585 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14589 def BuildHooksNodes(self):
14590 """Build hooks nodes.
14593 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14595 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14596 nl.append(self.op.target_node)
14600 def CheckPrereq(self):
14601 """Check prerequisites.
14603 This checks that the instance and node names are valid.
14606 instance_name = self.op.instance_name
14608 self.instance = self.cfg.GetInstanceInfo(instance_name)
14609 assert self.instance is not None, \
14610 "Cannot retrieve locked instance %s" % self.op.instance_name
14611 _CheckNodeOnline(self, self.instance.primary_node)
14613 if (self.op.remove_instance and
14614 self.instance.admin_state == constants.ADMINST_UP and
14615 not self.op.shutdown):
14616 raise errors.OpPrereqError("Can not remove instance without shutting it"
14617 " down before", errors.ECODE_STATE)
14619 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14620 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14621 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14622 assert self.dst_node is not None
14624 _CheckNodeOnline(self, self.dst_node.name)
14625 _CheckNodeNotDrained(self, self.dst_node.name)
14628 self.dest_disk_info = None
14629 self.dest_x509_ca = None
14631 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14632 self.dst_node = None
14634 if len(self.op.target_node) != len(self.instance.disks):
14635 raise errors.OpPrereqError(("Received destination information for %s"
14636 " disks, but instance %s has %s disks") %
14637 (len(self.op.target_node), instance_name,
14638 len(self.instance.disks)),
14639 errors.ECODE_INVAL)
14641 cds = _GetClusterDomainSecret()
14643 # Check X509 key name
14645 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14646 except (TypeError, ValueError), err:
14647 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14648 errors.ECODE_INVAL)
14650 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14651 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14652 errors.ECODE_INVAL)
14654 # Load and verify CA
14656 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14657 except OpenSSL.crypto.Error, err:
14658 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14659 (err, ), errors.ECODE_INVAL)
14661 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14662 if errcode is not None:
14663 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14664 (msg, ), errors.ECODE_INVAL)
14666 self.dest_x509_ca = cert
14668 # Verify target information
14670 for idx, disk_data in enumerate(self.op.target_node):
14672 (host, port, magic) = \
14673 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14674 except errors.GenericError, err:
14675 raise errors.OpPrereqError("Target info for disk %s: %s" %
14676 (idx, err), errors.ECODE_INVAL)
14678 disk_info.append((host, port, magic))
14680 assert len(disk_info) == len(self.op.target_node)
14681 self.dest_disk_info = disk_info
14684 raise errors.ProgrammerError("Unhandled export mode %r" %
14687 # instance disk type verification
14688 # TODO: Implement export support for file-based disks
14689 for disk in self.instance.disks:
14690 if disk.dev_type == constants.LD_FILE:
14691 raise errors.OpPrereqError("Export not supported for instances with"
14692 " file-based disks", errors.ECODE_INVAL)
14694 def _CleanupExports(self, feedback_fn):
14695 """Removes exports of current instance from all other nodes.
14697 If an instance in a cluster with nodes A..D was exported to node C, its
14698 exports will be removed from the nodes A, B and D.
14701 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14703 nodelist = self.cfg.GetNodeList()
14704 nodelist.remove(self.dst_node.name)
14706 # on one-node clusters nodelist will be empty after the removal
14707 # if we proceed the backup would be removed because OpBackupQuery
14708 # substitutes an empty list with the full cluster node list.
14709 iname = self.instance.name
14711 feedback_fn("Removing old exports for instance %s" % iname)
14712 exportlist = self.rpc.call_export_list(nodelist)
14713 for node in exportlist:
14714 if exportlist[node].fail_msg:
14716 if iname in exportlist[node].payload:
14717 msg = self.rpc.call_export_remove(node, iname).fail_msg
14719 self.LogWarning("Could not remove older export for instance %s"
14720 " on node %s: %s", iname, node, msg)
14722 def Exec(self, feedback_fn):
14723 """Export an instance to an image in the cluster.
14726 assert self.op.mode in constants.EXPORT_MODES
14728 instance = self.instance
14729 src_node = instance.primary_node
14731 if self.op.shutdown:
14732 # shutdown the instance, but not the disks
14733 feedback_fn("Shutting down instance %s" % instance.name)
14734 result = self.rpc.call_instance_shutdown(src_node, instance,
14735 self.op.shutdown_timeout)
14736 # TODO: Maybe ignore failures if ignore_remove_failures is set
14737 result.Raise("Could not shutdown instance %s on"
14738 " node %s" % (instance.name, src_node))
14740 # set the disks ID correctly since call_instance_start needs the
14741 # correct drbd minor to create the symlinks
14742 for disk in instance.disks:
14743 self.cfg.SetDiskID(disk, src_node)
14745 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14748 # Activate the instance disks if we'exporting a stopped instance
14749 feedback_fn("Activating disks for %s" % instance.name)
14750 _StartInstanceDisks(self, instance, None)
14753 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14756 helper.CreateSnapshots()
14758 if (self.op.shutdown and
14759 instance.admin_state == constants.ADMINST_UP and
14760 not self.op.remove_instance):
14761 assert not activate_disks
14762 feedback_fn("Starting instance %s" % instance.name)
14763 result = self.rpc.call_instance_start(src_node,
14764 (instance, None, None), False)
14765 msg = result.fail_msg
14767 feedback_fn("Failed to start instance: %s" % msg)
14768 _ShutdownInstanceDisks(self, instance)
14769 raise errors.OpExecError("Could not start instance: %s" % msg)
14771 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14772 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14773 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14774 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14775 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14777 (key_name, _, _) = self.x509_key_name
14780 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14783 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14784 key_name, dest_ca_pem,
14789 # Check for backwards compatibility
14790 assert len(dresults) == len(instance.disks)
14791 assert compat.all(isinstance(i, bool) for i in dresults), \
14792 "Not all results are boolean: %r" % dresults
14796 feedback_fn("Deactivating disks for %s" % instance.name)
14797 _ShutdownInstanceDisks(self, instance)
14799 if not (compat.all(dresults) and fin_resu):
14802 failures.append("export finalization")
14803 if not compat.all(dresults):
14804 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14806 failures.append("disk export: disk(s) %s" % fdsk)
14808 raise errors.OpExecError("Export failed, errors in %s" %
14809 utils.CommaJoin(failures))
14811 # At this point, the export was successful, we can cleanup/finish
14813 # Remove instance if requested
14814 if self.op.remove_instance:
14815 feedback_fn("Removing instance %s" % instance.name)
14816 _RemoveInstance(self, feedback_fn, instance,
14817 self.op.ignore_remove_failures)
14819 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14820 self._CleanupExports(feedback_fn)
14822 return fin_resu, dresults
14825 class LUBackupRemove(NoHooksLU):
14826 """Remove exports related to the named instance.
14831 def ExpandNames(self):
14832 self.needed_locks = {
14833 # We need all nodes to be locked in order for RemoveExport to work, but
14834 # we don't need to lock the instance itself, as nothing will happen to it
14835 # (and we can remove exports also for a removed instance)
14836 locking.LEVEL_NODE: locking.ALL_SET,
14838 # Removing backups is quick, so blocking allocations is justified
14839 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14842 # Allocations should be stopped while this LU runs with node locks, but it
14843 # doesn't have to be exclusive
14844 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14846 def Exec(self, feedback_fn):
14847 """Remove any export.
14850 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14851 # If the instance was not found we'll try with the name that was passed in.
14852 # This will only work if it was an FQDN, though.
14854 if not instance_name:
14856 instance_name = self.op.instance_name
14858 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14859 exportlist = self.rpc.call_export_list(locked_nodes)
14861 for node in exportlist:
14862 msg = exportlist[node].fail_msg
14864 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14866 if instance_name in exportlist[node].payload:
14868 result = self.rpc.call_export_remove(node, instance_name)
14869 msg = result.fail_msg
14871 logging.error("Could not remove export for instance %s"
14872 " on node %s: %s", instance_name, node, msg)
14874 if fqdn_warn and not found:
14875 feedback_fn("Export not found. If trying to remove an export belonging"
14876 " to a deleted instance please use its Fully Qualified"
14880 class LUGroupAdd(LogicalUnit):
14881 """Logical unit for creating node groups.
14884 HPATH = "group-add"
14885 HTYPE = constants.HTYPE_GROUP
14888 def ExpandNames(self):
14889 # We need the new group's UUID here so that we can create and acquire the
14890 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14891 # that it should not check whether the UUID exists in the configuration.
14892 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14893 self.needed_locks = {}
14894 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14896 def CheckPrereq(self):
14897 """Check prerequisites.
14899 This checks that the given group name is not an existing node group
14904 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14905 except errors.OpPrereqError:
14908 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14909 " node group (UUID: %s)" %
14910 (self.op.group_name, existing_uuid),
14911 errors.ECODE_EXISTS)
14913 if self.op.ndparams:
14914 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14916 if self.op.hv_state:
14917 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14919 self.new_hv_state = None
14921 if self.op.disk_state:
14922 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14924 self.new_disk_state = None
14926 if self.op.diskparams:
14927 for templ in constants.DISK_TEMPLATES:
14928 if templ in self.op.diskparams:
14929 utils.ForceDictType(self.op.diskparams[templ],
14930 constants.DISK_DT_TYPES)
14931 self.new_diskparams = self.op.diskparams
14933 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14934 except errors.OpPrereqError, err:
14935 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14936 errors.ECODE_INVAL)
14938 self.new_diskparams = {}
14940 if self.op.ipolicy:
14941 cluster = self.cfg.GetClusterInfo()
14942 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14944 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14945 except errors.ConfigurationError, err:
14946 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14947 errors.ECODE_INVAL)
14949 def BuildHooksEnv(self):
14950 """Build hooks env.
14954 "GROUP_NAME": self.op.group_name,
14957 def BuildHooksNodes(self):
14958 """Build hooks nodes.
14961 mn = self.cfg.GetMasterNode()
14962 return ([mn], [mn])
14964 def Exec(self, feedback_fn):
14965 """Add the node group to the cluster.
14968 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14969 uuid=self.group_uuid,
14970 alloc_policy=self.op.alloc_policy,
14971 ndparams=self.op.ndparams,
14972 diskparams=self.new_diskparams,
14973 ipolicy=self.op.ipolicy,
14974 hv_state_static=self.new_hv_state,
14975 disk_state_static=self.new_disk_state)
14977 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14978 del self.remove_locks[locking.LEVEL_NODEGROUP]
14981 class LUGroupAssignNodes(NoHooksLU):
14982 """Logical unit for assigning nodes to groups.
14987 def ExpandNames(self):
14988 # These raise errors.OpPrereqError on their own:
14989 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14990 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14992 # We want to lock all the affected nodes and groups. We have readily
14993 # available the list of nodes, and the *destination* group. To gather the
14994 # list of "source" groups, we need to fetch node information later on.
14995 self.needed_locks = {
14996 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14997 locking.LEVEL_NODE: self.op.nodes,
15000 def DeclareLocks(self, level):
15001 if level == locking.LEVEL_NODEGROUP:
15002 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15004 # Try to get all affected nodes' groups without having the group or node
15005 # lock yet. Needs verification later in the code flow.
15006 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15008 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15010 def CheckPrereq(self):
15011 """Check prerequisites.
15014 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15015 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15016 frozenset(self.op.nodes))
15018 expected_locks = (set([self.group_uuid]) |
15019 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15020 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15021 if actual_locks != expected_locks:
15022 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15023 " current groups are '%s', used to be '%s'" %
15024 (utils.CommaJoin(expected_locks),
15025 utils.CommaJoin(actual_locks)))
15027 self.node_data = self.cfg.GetAllNodesInfo()
15028 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15029 instance_data = self.cfg.GetAllInstancesInfo()
15031 if self.group is None:
15032 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15033 (self.op.group_name, self.group_uuid))
15035 (new_splits, previous_splits) = \
15036 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15037 for node in self.op.nodes],
15038 self.node_data, instance_data)
15041 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15043 if not self.op.force:
15044 raise errors.OpExecError("The following instances get split by this"
15045 " change and --force was not given: %s" %
15048 self.LogWarning("This operation will split the following instances: %s",
15051 if previous_splits:
15052 self.LogWarning("In addition, these already-split instances continue"
15053 " to be split across groups: %s",
15054 utils.CommaJoin(utils.NiceSort(previous_splits)))
15056 def Exec(self, feedback_fn):
15057 """Assign nodes to a new group.
15060 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15062 self.cfg.AssignGroupNodes(mods)
15065 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15066 """Check for split instances after a node assignment.
15068 This method considers a series of node assignments as an atomic operation,
15069 and returns information about split instances after applying the set of
15072 In particular, it returns information about newly split instances, and
15073 instances that were already split, and remain so after the change.
15075 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15078 @type changes: list of (node_name, new_group_uuid) pairs.
15079 @param changes: list of node assignments to consider.
15080 @param node_data: a dict with data for all nodes
15081 @param instance_data: a dict with all instances to consider
15082 @rtype: a two-tuple
15083 @return: a list of instances that were previously okay and result split as a
15084 consequence of this change, and a list of instances that were previously
15085 split and this change does not fix.
15088 changed_nodes = dict((node, group) for node, group in changes
15089 if node_data[node].group != group)
15091 all_split_instances = set()
15092 previously_split_instances = set()
15094 def InstanceNodes(instance):
15095 return [instance.primary_node] + list(instance.secondary_nodes)
15097 for inst in instance_data.values():
15098 if inst.disk_template not in constants.DTS_INT_MIRROR:
15101 instance_nodes = InstanceNodes(inst)
15103 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15104 previously_split_instances.add(inst.name)
15106 if len(set(changed_nodes.get(node, node_data[node].group)
15107 for node in instance_nodes)) > 1:
15108 all_split_instances.add(inst.name)
15110 return (list(all_split_instances - previously_split_instances),
15111 list(previously_split_instances & all_split_instances))
15114 class _GroupQuery(_QueryBase):
15115 FIELDS = query.GROUP_FIELDS
15117 def ExpandNames(self, lu):
15118 lu.needed_locks = {}
15120 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15121 self._cluster = lu.cfg.GetClusterInfo()
15122 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15125 self.wanted = [name_to_uuid[name]
15126 for name in utils.NiceSort(name_to_uuid.keys())]
15128 # Accept names to be either names or UUIDs.
15131 all_uuid = frozenset(self._all_groups.keys())
15133 for name in self.names:
15134 if name in all_uuid:
15135 self.wanted.append(name)
15136 elif name in name_to_uuid:
15137 self.wanted.append(name_to_uuid[name])
15139 missing.append(name)
15142 raise errors.OpPrereqError("Some groups do not exist: %s" %
15143 utils.CommaJoin(missing),
15144 errors.ECODE_NOENT)
15146 def DeclareLocks(self, lu, level):
15149 def _GetQueryData(self, lu):
15150 """Computes the list of node groups and their attributes.
15153 do_nodes = query.GQ_NODE in self.requested_data
15154 do_instances = query.GQ_INST in self.requested_data
15156 group_to_nodes = None
15157 group_to_instances = None
15159 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15160 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15161 # latter GetAllInstancesInfo() is not enough, for we have to go through
15162 # instance->node. Hence, we will need to process nodes even if we only need
15163 # instance information.
15164 if do_nodes or do_instances:
15165 all_nodes = lu.cfg.GetAllNodesInfo()
15166 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15169 for node in all_nodes.values():
15170 if node.group in group_to_nodes:
15171 group_to_nodes[node.group].append(node.name)
15172 node_to_group[node.name] = node.group
15175 all_instances = lu.cfg.GetAllInstancesInfo()
15176 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15178 for instance in all_instances.values():
15179 node = instance.primary_node
15180 if node in node_to_group:
15181 group_to_instances[node_to_group[node]].append(instance.name)
15184 # Do not pass on node information if it was not requested.
15185 group_to_nodes = None
15187 return query.GroupQueryData(self._cluster,
15188 [self._all_groups[uuid]
15189 for uuid in self.wanted],
15190 group_to_nodes, group_to_instances,
15191 query.GQ_DISKPARAMS in self.requested_data)
15194 class LUGroupQuery(NoHooksLU):
15195 """Logical unit for querying node groups.
15200 def CheckArguments(self):
15201 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15202 self.op.output_fields, False)
15204 def ExpandNames(self):
15205 self.gq.ExpandNames(self)
15207 def DeclareLocks(self, level):
15208 self.gq.DeclareLocks(self, level)
15210 def Exec(self, feedback_fn):
15211 return self.gq.OldStyleQuery(self)
15214 class LUGroupSetParams(LogicalUnit):
15215 """Modifies the parameters of a node group.
15218 HPATH = "group-modify"
15219 HTYPE = constants.HTYPE_GROUP
15222 def CheckArguments(self):
15225 self.op.diskparams,
15226 self.op.alloc_policy,
15228 self.op.disk_state,
15232 if all_changes.count(None) == len(all_changes):
15233 raise errors.OpPrereqError("Please pass at least one modification",
15234 errors.ECODE_INVAL)
15236 def ExpandNames(self):
15237 # This raises errors.OpPrereqError on its own:
15238 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15240 self.needed_locks = {
15241 locking.LEVEL_INSTANCE: [],
15242 locking.LEVEL_NODEGROUP: [self.group_uuid],
15245 self.share_locks[locking.LEVEL_INSTANCE] = 1
15247 def DeclareLocks(self, level):
15248 if level == locking.LEVEL_INSTANCE:
15249 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15251 # Lock instances optimistically, needs verification once group lock has
15253 self.needed_locks[locking.LEVEL_INSTANCE] = \
15254 self.cfg.GetNodeGroupInstances(self.group_uuid)
15257 def _UpdateAndVerifyDiskParams(old, new):
15258 """Updates and verifies disk parameters.
15261 new_params = _GetUpdatedParams(old, new)
15262 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15265 def CheckPrereq(self):
15266 """Check prerequisites.
15269 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15271 # Check if locked instances are still correct
15272 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15274 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15275 cluster = self.cfg.GetClusterInfo()
15277 if self.group is None:
15278 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15279 (self.op.group_name, self.group_uuid))
15281 if self.op.ndparams:
15282 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15283 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15284 self.new_ndparams = new_ndparams
15286 if self.op.diskparams:
15287 diskparams = self.group.diskparams
15288 uavdp = self._UpdateAndVerifyDiskParams
15289 # For each disktemplate subdict update and verify the values
15290 new_diskparams = dict((dt,
15291 uavdp(diskparams.get(dt, {}),
15292 self.op.diskparams[dt]))
15293 for dt in constants.DISK_TEMPLATES
15294 if dt in self.op.diskparams)
15295 # As we've all subdicts of diskparams ready, lets merge the actual
15296 # dict with all updated subdicts
15297 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15299 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15300 except errors.OpPrereqError, err:
15301 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15302 errors.ECODE_INVAL)
15304 if self.op.hv_state:
15305 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15306 self.group.hv_state_static)
15308 if self.op.disk_state:
15309 self.new_disk_state = \
15310 _MergeAndVerifyDiskState(self.op.disk_state,
15311 self.group.disk_state_static)
15313 if self.op.ipolicy:
15314 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15318 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15319 inst_filter = lambda inst: inst.name in owned_instances
15320 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15321 gmi = ganeti.masterd.instance
15323 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15325 new_ipolicy, instances)
15328 self.LogWarning("After the ipolicy change the following instances"
15329 " violate them: %s",
15330 utils.CommaJoin(violations))
15332 def BuildHooksEnv(self):
15333 """Build hooks env.
15337 "GROUP_NAME": self.op.group_name,
15338 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15341 def BuildHooksNodes(self):
15342 """Build hooks nodes.
15345 mn = self.cfg.GetMasterNode()
15346 return ([mn], [mn])
15348 def Exec(self, feedback_fn):
15349 """Modifies the node group.
15354 if self.op.ndparams:
15355 self.group.ndparams = self.new_ndparams
15356 result.append(("ndparams", str(self.group.ndparams)))
15358 if self.op.diskparams:
15359 self.group.diskparams = self.new_diskparams
15360 result.append(("diskparams", str(self.group.diskparams)))
15362 if self.op.alloc_policy:
15363 self.group.alloc_policy = self.op.alloc_policy
15365 if self.op.hv_state:
15366 self.group.hv_state_static = self.new_hv_state
15368 if self.op.disk_state:
15369 self.group.disk_state_static = self.new_disk_state
15371 if self.op.ipolicy:
15372 self.group.ipolicy = self.new_ipolicy
15374 self.cfg.Update(self.group, feedback_fn)
15378 class LUGroupRemove(LogicalUnit):
15379 HPATH = "group-remove"
15380 HTYPE = constants.HTYPE_GROUP
15383 def ExpandNames(self):
15384 # This will raises errors.OpPrereqError on its own:
15385 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15386 self.needed_locks = {
15387 locking.LEVEL_NODEGROUP: [self.group_uuid],
15390 def CheckPrereq(self):
15391 """Check prerequisites.
15393 This checks that the given group name exists as a node group, that is
15394 empty (i.e., contains no nodes), and that is not the last group of the
15398 # Verify that the group is empty.
15399 group_nodes = [node.name
15400 for node in self.cfg.GetAllNodesInfo().values()
15401 if node.group == self.group_uuid]
15404 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15406 (self.op.group_name,
15407 utils.CommaJoin(utils.NiceSort(group_nodes))),
15408 errors.ECODE_STATE)
15410 # Verify the cluster would not be left group-less.
15411 if len(self.cfg.GetNodeGroupList()) == 1:
15412 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15413 " removed" % self.op.group_name,
15414 errors.ECODE_STATE)
15416 def BuildHooksEnv(self):
15417 """Build hooks env.
15421 "GROUP_NAME": self.op.group_name,
15424 def BuildHooksNodes(self):
15425 """Build hooks nodes.
15428 mn = self.cfg.GetMasterNode()
15429 return ([mn], [mn])
15431 def Exec(self, feedback_fn):
15432 """Remove the node group.
15436 self.cfg.RemoveNodeGroup(self.group_uuid)
15437 except errors.ConfigurationError:
15438 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15439 (self.op.group_name, self.group_uuid))
15441 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15444 class LUGroupRename(LogicalUnit):
15445 HPATH = "group-rename"
15446 HTYPE = constants.HTYPE_GROUP
15449 def ExpandNames(self):
15450 # This raises errors.OpPrereqError on its own:
15451 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15453 self.needed_locks = {
15454 locking.LEVEL_NODEGROUP: [self.group_uuid],
15457 def CheckPrereq(self):
15458 """Check prerequisites.
15460 Ensures requested new name is not yet used.
15464 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15465 except errors.OpPrereqError:
15468 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15469 " node group (UUID: %s)" %
15470 (self.op.new_name, new_name_uuid),
15471 errors.ECODE_EXISTS)
15473 def BuildHooksEnv(self):
15474 """Build hooks env.
15478 "OLD_NAME": self.op.group_name,
15479 "NEW_NAME": self.op.new_name,
15482 def BuildHooksNodes(self):
15483 """Build hooks nodes.
15486 mn = self.cfg.GetMasterNode()
15488 all_nodes = self.cfg.GetAllNodesInfo()
15489 all_nodes.pop(mn, None)
15492 run_nodes.extend(node.name for node in all_nodes.values()
15493 if node.group == self.group_uuid)
15495 return (run_nodes, run_nodes)
15497 def Exec(self, feedback_fn):
15498 """Rename the node group.
15501 group = self.cfg.GetNodeGroup(self.group_uuid)
15504 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15505 (self.op.group_name, self.group_uuid))
15507 group.name = self.op.new_name
15508 self.cfg.Update(group, feedback_fn)
15510 return self.op.new_name
15513 class LUGroupEvacuate(LogicalUnit):
15514 HPATH = "group-evacuate"
15515 HTYPE = constants.HTYPE_GROUP
15518 def ExpandNames(self):
15519 # This raises errors.OpPrereqError on its own:
15520 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15522 if self.op.target_groups:
15523 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15524 self.op.target_groups)
15526 self.req_target_uuids = []
15528 if self.group_uuid in self.req_target_uuids:
15529 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15530 " as a target group (targets are %s)" %
15532 utils.CommaJoin(self.req_target_uuids)),
15533 errors.ECODE_INVAL)
15535 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15537 self.share_locks = _ShareAll()
15538 self.needed_locks = {
15539 locking.LEVEL_INSTANCE: [],
15540 locking.LEVEL_NODEGROUP: [],
15541 locking.LEVEL_NODE: [],
15544 def DeclareLocks(self, level):
15545 if level == locking.LEVEL_INSTANCE:
15546 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15548 # Lock instances optimistically, needs verification once node and group
15549 # locks have been acquired
15550 self.needed_locks[locking.LEVEL_INSTANCE] = \
15551 self.cfg.GetNodeGroupInstances(self.group_uuid)
15553 elif level == locking.LEVEL_NODEGROUP:
15554 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15556 if self.req_target_uuids:
15557 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15559 # Lock all groups used by instances optimistically; this requires going
15560 # via the node before it's locked, requiring verification later on
15561 lock_groups.update(group_uuid
15562 for instance_name in
15563 self.owned_locks(locking.LEVEL_INSTANCE)
15565 self.cfg.GetInstanceNodeGroups(instance_name))
15567 # No target groups, need to lock all of them
15568 lock_groups = locking.ALL_SET
15570 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15572 elif level == locking.LEVEL_NODE:
15573 # This will only lock the nodes in the group to be evacuated which
15574 # contain actual instances
15575 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15576 self._LockInstancesNodes()
15578 # Lock all nodes in group to be evacuated and target groups
15579 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15580 assert self.group_uuid in owned_groups
15581 member_nodes = [node_name
15582 for group in owned_groups
15583 for node_name in self.cfg.GetNodeGroup(group).members]
15584 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15586 def CheckPrereq(self):
15587 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15588 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15589 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15591 assert owned_groups.issuperset(self.req_target_uuids)
15592 assert self.group_uuid in owned_groups
15594 # Check if locked instances are still correct
15595 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15597 # Get instance information
15598 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15600 # Check if node groups for locked instances are still correct
15601 _CheckInstancesNodeGroups(self.cfg, self.instances,
15602 owned_groups, owned_nodes, self.group_uuid)
15604 if self.req_target_uuids:
15605 # User requested specific target groups
15606 self.target_uuids = self.req_target_uuids
15608 # All groups except the one to be evacuated are potential targets
15609 self.target_uuids = [group_uuid for group_uuid in owned_groups
15610 if group_uuid != self.group_uuid]
15612 if not self.target_uuids:
15613 raise errors.OpPrereqError("There are no possible target groups",
15614 errors.ECODE_INVAL)
15616 def BuildHooksEnv(self):
15617 """Build hooks env.
15621 "GROUP_NAME": self.op.group_name,
15622 "TARGET_GROUPS": " ".join(self.target_uuids),
15625 def BuildHooksNodes(self):
15626 """Build hooks nodes.
15629 mn = self.cfg.GetMasterNode()
15631 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15633 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15635 return (run_nodes, run_nodes)
15637 def Exec(self, feedback_fn):
15638 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15640 assert self.group_uuid not in self.target_uuids
15642 req = iallocator.IAReqGroupChange(instances=instances,
15643 target_groups=self.target_uuids)
15644 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15646 ial.Run(self.op.iallocator)
15648 if not ial.success:
15649 raise errors.OpPrereqError("Can't compute group evacuation using"
15650 " iallocator '%s': %s" %
15651 (self.op.iallocator, ial.info),
15652 errors.ECODE_NORES)
15654 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15656 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15657 len(jobs), self.op.group_name)
15659 return ResultWithJobs(jobs)
15662 class TagsLU(NoHooksLU): # pylint: disable=W0223
15663 """Generic tags LU.
15665 This is an abstract class which is the parent of all the other tags LUs.
15668 def ExpandNames(self):
15669 self.group_uuid = None
15670 self.needed_locks = {}
15672 if self.op.kind == constants.TAG_NODE:
15673 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15674 lock_level = locking.LEVEL_NODE
15675 lock_name = self.op.name
15676 elif self.op.kind == constants.TAG_INSTANCE:
15677 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15678 lock_level = locking.LEVEL_INSTANCE
15679 lock_name = self.op.name
15680 elif self.op.kind == constants.TAG_NODEGROUP:
15681 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15682 lock_level = locking.LEVEL_NODEGROUP
15683 lock_name = self.group_uuid
15684 elif self.op.kind == constants.TAG_NETWORK:
15685 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15686 lock_level = locking.LEVEL_NETWORK
15687 lock_name = self.network_uuid
15692 if lock_level and getattr(self.op, "use_locking", True):
15693 self.needed_locks[lock_level] = lock_name
15695 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15696 # not possible to acquire the BGL based on opcode parameters)
15698 def CheckPrereq(self):
15699 """Check prerequisites.
15702 if self.op.kind == constants.TAG_CLUSTER:
15703 self.target = self.cfg.GetClusterInfo()
15704 elif self.op.kind == constants.TAG_NODE:
15705 self.target = self.cfg.GetNodeInfo(self.op.name)
15706 elif self.op.kind == constants.TAG_INSTANCE:
15707 self.target = self.cfg.GetInstanceInfo(self.op.name)
15708 elif self.op.kind == constants.TAG_NODEGROUP:
15709 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15710 elif self.op.kind == constants.TAG_NETWORK:
15711 self.target = self.cfg.GetNetwork(self.network_uuid)
15713 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15714 str(self.op.kind), errors.ECODE_INVAL)
15717 class LUTagsGet(TagsLU):
15718 """Returns the tags of a given object.
15723 def ExpandNames(self):
15724 TagsLU.ExpandNames(self)
15726 # Share locks as this is only a read operation
15727 self.share_locks = _ShareAll()
15729 def Exec(self, feedback_fn):
15730 """Returns the tag list.
15733 return list(self.target.GetTags())
15736 class LUTagsSearch(NoHooksLU):
15737 """Searches the tags for a given pattern.
15742 def ExpandNames(self):
15743 self.needed_locks = {}
15745 def CheckPrereq(self):
15746 """Check prerequisites.
15748 This checks the pattern passed for validity by compiling it.
15752 self.re = re.compile(self.op.pattern)
15753 except re.error, err:
15754 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15755 (self.op.pattern, err), errors.ECODE_INVAL)
15757 def Exec(self, feedback_fn):
15758 """Returns the tag list.
15762 tgts = [("/cluster", cfg.GetClusterInfo())]
15763 ilist = cfg.GetAllInstancesInfo().values()
15764 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15765 nlist = cfg.GetAllNodesInfo().values()
15766 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15767 tgts.extend(("/nodegroup/%s" % n.name, n)
15768 for n in cfg.GetAllNodeGroupsInfo().values())
15770 for path, target in tgts:
15771 for tag in target.GetTags():
15772 if self.re.search(tag):
15773 results.append((path, tag))
15777 class LUTagsSet(TagsLU):
15778 """Sets a tag on a given object.
15783 def CheckPrereq(self):
15784 """Check prerequisites.
15786 This checks the type and length of the tag name and value.
15789 TagsLU.CheckPrereq(self)
15790 for tag in self.op.tags:
15791 objects.TaggableObject.ValidateTag(tag)
15793 def Exec(self, feedback_fn):
15798 for tag in self.op.tags:
15799 self.target.AddTag(tag)
15800 except errors.TagError, err:
15801 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15802 self.cfg.Update(self.target, feedback_fn)
15805 class LUTagsDel(TagsLU):
15806 """Delete a list of tags from a given object.
15811 def CheckPrereq(self):
15812 """Check prerequisites.
15814 This checks that we have the given tag.
15817 TagsLU.CheckPrereq(self)
15818 for tag in self.op.tags:
15819 objects.TaggableObject.ValidateTag(tag)
15820 del_tags = frozenset(self.op.tags)
15821 cur_tags = self.target.GetTags()
15823 diff_tags = del_tags - cur_tags
15825 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15826 raise errors.OpPrereqError("Tag(s) %s not found" %
15827 (utils.CommaJoin(diff_names), ),
15828 errors.ECODE_NOENT)
15830 def Exec(self, feedback_fn):
15831 """Remove the tag from the object.
15834 for tag in self.op.tags:
15835 self.target.RemoveTag(tag)
15836 self.cfg.Update(self.target, feedback_fn)
15839 class LUTestDelay(NoHooksLU):
15840 """Sleep for a specified amount of time.
15842 This LU sleeps on the master and/or nodes for a specified amount of
15848 def ExpandNames(self):
15849 """Expand names and set required locks.
15851 This expands the node list, if any.
15854 self.needed_locks = {}
15855 if self.op.on_nodes:
15856 # _GetWantedNodes can be used here, but is not always appropriate to use
15857 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15858 # more information.
15859 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15860 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15862 def _TestDelay(self):
15863 """Do the actual sleep.
15866 if self.op.on_master:
15867 if not utils.TestDelay(self.op.duration):
15868 raise errors.OpExecError("Error during master delay test")
15869 if self.op.on_nodes:
15870 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15871 for node, node_result in result.items():
15872 node_result.Raise("Failure during rpc call to node %s" % node)
15874 def Exec(self, feedback_fn):
15875 """Execute the test delay opcode, with the wanted repetitions.
15878 if self.op.repeat == 0:
15881 top_value = self.op.repeat - 1
15882 for i in range(self.op.repeat):
15883 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15887 class LURestrictedCommand(NoHooksLU):
15888 """Logical unit for executing restricted commands.
15893 def ExpandNames(self):
15895 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15897 self.needed_locks = {
15898 locking.LEVEL_NODE: self.op.nodes,
15900 self.share_locks = {
15901 locking.LEVEL_NODE: not self.op.use_locking,
15904 def CheckPrereq(self):
15905 """Check prerequisites.
15909 def Exec(self, feedback_fn):
15910 """Execute restricted command and return output.
15913 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15915 # Check if correct locks are held
15916 assert set(self.op.nodes).issubset(owned_nodes)
15918 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15922 for node_name in self.op.nodes:
15923 nres = rpcres[node_name]
15925 msg = ("Command '%s' on node '%s' failed: %s" %
15926 (self.op.command, node_name, nres.fail_msg))
15927 result.append((False, msg))
15929 result.append((True, nres.payload))
15934 class LUTestJqueue(NoHooksLU):
15935 """Utility LU to test some aspects of the job queue.
15940 # Must be lower than default timeout for WaitForJobChange to see whether it
15941 # notices changed jobs
15942 _CLIENT_CONNECT_TIMEOUT = 20.0
15943 _CLIENT_CONFIRM_TIMEOUT = 60.0
15946 def _NotifyUsingSocket(cls, cb, errcls):
15947 """Opens a Unix socket and waits for another program to connect.
15950 @param cb: Callback to send socket name to client
15951 @type errcls: class
15952 @param errcls: Exception class to use for errors
15955 # Using a temporary directory as there's no easy way to create temporary
15956 # sockets without writing a custom loop around tempfile.mktemp and
15958 tmpdir = tempfile.mkdtemp()
15960 tmpsock = utils.PathJoin(tmpdir, "sock")
15962 logging.debug("Creating temporary socket at %s", tmpsock)
15963 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15968 # Send details to client
15971 # Wait for client to connect before continuing
15972 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15974 (conn, _) = sock.accept()
15975 except socket.error, err:
15976 raise errcls("Client didn't connect in time (%s)" % err)
15980 # Remove as soon as client is connected
15981 shutil.rmtree(tmpdir)
15983 # Wait for client to close
15986 # pylint: disable=E1101
15987 # Instance of '_socketobject' has no ... member
15988 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15990 except socket.error, err:
15991 raise errcls("Client failed to confirm notification (%s)" % err)
15995 def _SendNotification(self, test, arg, sockname):
15996 """Sends a notification to the client.
15999 @param test: Test name
16000 @param arg: Test argument (depends on test)
16001 @type sockname: string
16002 @param sockname: Socket path
16005 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16007 def _Notify(self, prereq, test, arg):
16008 """Notifies the client of a test.
16011 @param prereq: Whether this is a prereq-phase test
16013 @param test: Test name
16014 @param arg: Test argument (depends on test)
16018 errcls = errors.OpPrereqError
16020 errcls = errors.OpExecError
16022 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16026 def CheckArguments(self):
16027 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16028 self.expandnames_calls = 0
16030 def ExpandNames(self):
16031 checkargs_calls = getattr(self, "checkargs_calls", 0)
16032 if checkargs_calls < 1:
16033 raise errors.ProgrammerError("CheckArguments was not called")
16035 self.expandnames_calls += 1
16037 if self.op.notify_waitlock:
16038 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16040 self.LogInfo("Expanding names")
16042 # Get lock on master node (just to get a lock, not for a particular reason)
16043 self.needed_locks = {
16044 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16047 def Exec(self, feedback_fn):
16048 if self.expandnames_calls < 1:
16049 raise errors.ProgrammerError("ExpandNames was not called")
16051 if self.op.notify_exec:
16052 self._Notify(False, constants.JQT_EXEC, None)
16054 self.LogInfo("Executing")
16056 if self.op.log_messages:
16057 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16058 for idx, msg in enumerate(self.op.log_messages):
16059 self.LogInfo("Sending log message %s", idx + 1)
16060 feedback_fn(constants.JQT_MSGPREFIX + msg)
16061 # Report how many test messages have been sent
16062 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16065 raise errors.OpExecError("Opcode failure was requested")
16070 class LUTestAllocator(NoHooksLU):
16071 """Run allocator tests.
16073 This LU runs the allocator tests
16076 def CheckPrereq(self):
16077 """Check prerequisites.
16079 This checks the opcode parameters depending on the director and mode test.
16082 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16083 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16084 for attr in ["memory", "disks", "disk_template",
16085 "os", "tags", "nics", "vcpus"]:
16086 if not hasattr(self.op, attr):
16087 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16088 attr, errors.ECODE_INVAL)
16089 iname = self.cfg.ExpandInstanceName(self.op.name)
16090 if iname is not None:
16091 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16092 iname, errors.ECODE_EXISTS)
16093 if not isinstance(self.op.nics, list):
16094 raise errors.OpPrereqError("Invalid parameter 'nics'",
16095 errors.ECODE_INVAL)
16096 if not isinstance(self.op.disks, list):
16097 raise errors.OpPrereqError("Invalid parameter 'disks'",
16098 errors.ECODE_INVAL)
16099 for row in self.op.disks:
16100 if (not isinstance(row, dict) or
16101 constants.IDISK_SIZE not in row or
16102 not isinstance(row[constants.IDISK_SIZE], int) or
16103 constants.IDISK_MODE not in row or
16104 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16105 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16106 " parameter", errors.ECODE_INVAL)
16107 if self.op.hypervisor is None:
16108 self.op.hypervisor = self.cfg.GetHypervisorType()
16109 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16110 fname = _ExpandInstanceName(self.cfg, self.op.name)
16111 self.op.name = fname
16112 self.relocate_from = \
16113 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16114 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16115 constants.IALLOCATOR_MODE_NODE_EVAC):
16116 if not self.op.instances:
16117 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16118 self.op.instances = _GetWantedInstances(self, self.op.instances)
16120 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16121 self.op.mode, errors.ECODE_INVAL)
16123 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16124 if self.op.iallocator is None:
16125 raise errors.OpPrereqError("Missing allocator name",
16126 errors.ECODE_INVAL)
16127 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16128 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16129 self.op.direction, errors.ECODE_INVAL)
16131 def Exec(self, feedback_fn):
16132 """Run the allocator test.
16135 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16136 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16137 memory=self.op.memory,
16138 disks=self.op.disks,
16139 disk_template=self.op.disk_template,
16143 vcpus=self.op.vcpus,
16144 spindle_use=self.op.spindle_use,
16145 hypervisor=self.op.hypervisor)
16146 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16147 req = iallocator.IAReqRelocate(name=self.op.name,
16148 relocate_from=list(self.relocate_from))
16149 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16150 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16151 target_groups=self.op.target_groups)
16152 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16153 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16154 evac_mode=self.op.evac_mode)
16155 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16156 disk_template = self.op.disk_template
16157 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16158 memory=self.op.memory,
16159 disks=self.op.disks,
16160 disk_template=disk_template,
16164 vcpus=self.op.vcpus,
16165 spindle_use=self.op.spindle_use,
16166 hypervisor=self.op.hypervisor)
16167 for idx in range(self.op.count)]
16168 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16170 raise errors.ProgrammerError("Uncatched mode %s in"
16171 " LUTestAllocator.Exec", self.op.mode)
16173 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16174 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16175 result = ial.in_text
16177 ial.Run(self.op.iallocator, validate=False)
16178 result = ial.out_text
16182 class LUNetworkAdd(LogicalUnit):
16183 """Logical unit for creating networks.
16186 HPATH = "network-add"
16187 HTYPE = constants.HTYPE_NETWORK
16190 def BuildHooksNodes(self):
16191 """Build hooks nodes.
16194 mn = self.cfg.GetMasterNode()
16195 return ([mn], [mn])
16197 def CheckArguments(self):
16198 if self.op.mac_prefix:
16199 self.op.mac_prefix = \
16200 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16202 def ExpandNames(self):
16203 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16205 if self.op.conflicts_check:
16206 self.share_locks[locking.LEVEL_NODE] = 1
16207 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16208 self.needed_locks = {
16209 locking.LEVEL_NODE: locking.ALL_SET,
16210 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16213 self.needed_locks = {}
16215 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16217 def CheckPrereq(self):
16218 if self.op.network is None:
16219 raise errors.OpPrereqError("Network must be given",
16220 errors.ECODE_INVAL)
16222 uuid = self.cfg.LookupNetwork(self.op.network_name)
16225 raise errors.OpPrereqError(("Network with name '%s' already exists" %
16226 self.op.network_name), errors.ECODE_EXISTS)
16228 # Check tag validity
16229 for tag in self.op.tags:
16230 objects.TaggableObject.ValidateTag(tag)
16232 def BuildHooksEnv(self):
16233 """Build hooks env.
16237 "name": self.op.network_name,
16238 "subnet": self.op.network,
16239 "gateway": self.op.gateway,
16240 "network6": self.op.network6,
16241 "gateway6": self.op.gateway6,
16242 "mac_prefix": self.op.mac_prefix,
16243 "network_type": self.op.network_type,
16244 "tags": self.op.tags,
16246 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16248 def Exec(self, feedback_fn):
16249 """Add the ip pool to the cluster.
16252 nobj = objects.Network(name=self.op.network_name,
16253 network=self.op.network,
16254 gateway=self.op.gateway,
16255 network6=self.op.network6,
16256 gateway6=self.op.gateway6,
16257 mac_prefix=self.op.mac_prefix,
16258 network_type=self.op.network_type,
16259 uuid=self.network_uuid,
16260 family=constants.IP4_VERSION)
16261 # Initialize the associated address pool
16263 pool = network.AddressPool.InitializeNetwork(nobj)
16264 except errors.AddressPoolError, e:
16265 raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16267 # Check if we need to reserve the nodes and the cluster master IP
16268 # These may not be allocated to any instances in routed mode, as
16269 # they wouldn't function anyway.
16270 if self.op.conflicts_check:
16271 for node in self.cfg.GetAllNodesInfo().values():
16272 for ip in [node.primary_ip, node.secondary_ip]:
16274 if pool.Contains(ip):
16276 self.LogInfo("Reserved IP address of node '%s' (%s)",
16278 except errors.AddressPoolError:
16279 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16282 master_ip = self.cfg.GetClusterInfo().master_ip
16284 if pool.Contains(master_ip):
16285 pool.Reserve(master_ip)
16286 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16287 except errors.AddressPoolError:
16288 self.LogWarning("Cannot reserve cluster master IP address (%s)",
16291 if self.op.add_reserved_ips:
16292 for ip in self.op.add_reserved_ips:
16294 pool.Reserve(ip, external=True)
16295 except errors.AddressPoolError, e:
16296 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16299 for tag in self.op.tags:
16302 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16303 del self.remove_locks[locking.LEVEL_NETWORK]
16306 class LUNetworkRemove(LogicalUnit):
16307 HPATH = "network-remove"
16308 HTYPE = constants.HTYPE_NETWORK
16311 def ExpandNames(self):
16312 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16314 if not self.network_uuid:
16315 raise errors.OpPrereqError(("Network '%s' not found" %
16316 self.op.network_name), errors.ECODE_NOENT)
16318 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16319 self.needed_locks = {
16320 locking.LEVEL_NETWORK: [self.network_uuid],
16321 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16324 def CheckPrereq(self):
16325 """Check prerequisites.
16327 This checks that the given network name exists as a network, that is
16328 empty (i.e., contains no nodes), and that is not the last group of the
16332 # Verify that the network is not conncted.
16333 node_groups = [group.name
16334 for group in self.cfg.GetAllNodeGroupsInfo().values()
16335 if self.network_uuid in group.networks]
16338 self.LogWarning("Network '%s' is connected to the following"
16339 " node groups: %s" %
16340 (self.op.network_name,
16341 utils.CommaJoin(utils.NiceSort(node_groups))))
16342 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16344 def BuildHooksEnv(self):
16345 """Build hooks env.
16349 "NETWORK_NAME": self.op.network_name,
16352 def BuildHooksNodes(self):
16353 """Build hooks nodes.
16356 mn = self.cfg.GetMasterNode()
16357 return ([mn], [mn])
16359 def Exec(self, feedback_fn):
16360 """Remove the network.
16364 self.cfg.RemoveNetwork(self.network_uuid)
16365 except errors.ConfigurationError:
16366 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16367 (self.op.network_name, self.network_uuid))
16370 class LUNetworkSetParams(LogicalUnit):
16371 """Modifies the parameters of a network.
16374 HPATH = "network-modify"
16375 HTYPE = constants.HTYPE_NETWORK
16378 def CheckArguments(self):
16379 if (self.op.gateway and
16380 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16381 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16382 " at once", errors.ECODE_INVAL)
16384 def ExpandNames(self):
16385 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16386 if self.network_uuid is None:
16387 raise errors.OpPrereqError(("Network '%s' not found" %
16388 self.op.network_name), errors.ECODE_NOENT)
16390 self.needed_locks = {
16391 locking.LEVEL_NETWORK: [self.network_uuid],
16394 def CheckPrereq(self):
16395 """Check prerequisites.
16398 self.network = self.cfg.GetNetwork(self.network_uuid)
16399 self.gateway = self.network.gateway
16400 self.network_type = self.network.network_type
16401 self.mac_prefix = self.network.mac_prefix
16402 self.network6 = self.network.network6
16403 self.gateway6 = self.network.gateway6
16404 self.tags = self.network.tags
16406 self.pool = network.AddressPool(self.network)
16408 if self.op.gateway:
16409 if self.op.gateway == constants.VALUE_NONE:
16410 self.gateway = None
16412 self.gateway = self.op.gateway
16413 if self.pool.IsReserved(self.gateway):
16414 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16415 " reserved" % self.gateway,
16416 errors.ECODE_STATE)
16418 if self.op.network_type:
16419 if self.op.network_type == constants.VALUE_NONE:
16420 self.network_type = None
16422 self.network_type = self.op.network_type
16424 if self.op.mac_prefix:
16425 if self.op.mac_prefix == constants.VALUE_NONE:
16426 self.mac_prefix = None
16428 self.mac_prefix = \
16429 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16431 if self.op.gateway6:
16432 if self.op.gateway6 == constants.VALUE_NONE:
16433 self.gateway6 = None
16435 self.gateway6 = self.op.gateway6
16437 if self.op.network6:
16438 if self.op.network6 == constants.VALUE_NONE:
16439 self.network6 = None
16441 self.network6 = self.op.network6
16443 def BuildHooksEnv(self):
16444 """Build hooks env.
16448 "name": self.op.network_name,
16449 "subnet": self.network.network,
16450 "gateway": self.gateway,
16451 "network6": self.network6,
16452 "gateway6": self.gateway6,
16453 "mac_prefix": self.mac_prefix,
16454 "network_type": self.network_type,
16457 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16459 def BuildHooksNodes(self):
16460 """Build hooks nodes.
16463 mn = self.cfg.GetMasterNode()
16464 return ([mn], [mn])
16466 def Exec(self, feedback_fn):
16467 """Modifies the network.
16470 #TODO: reserve/release via temporary reservation manager
16471 # extend cfg.ReserveIp/ReleaseIp with the external flag
16472 if self.op.gateway:
16473 if self.gateway == self.network.gateway:
16474 self.LogWarning("Gateway is already %s", self.gateway)
16477 self.pool.Reserve(self.gateway, external=True)
16478 if self.network.gateway:
16479 self.pool.Release(self.network.gateway, external=True)
16480 self.network.gateway = self.gateway
16482 if self.op.add_reserved_ips:
16483 for ip in self.op.add_reserved_ips:
16485 if self.pool.IsReserved(ip):
16486 self.LogWarning("IP address %s is already reserved", ip)
16488 self.pool.Reserve(ip, external=True)
16489 except errors.AddressPoolError, err:
16490 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16492 if self.op.remove_reserved_ips:
16493 for ip in self.op.remove_reserved_ips:
16494 if ip == self.network.gateway:
16495 self.LogWarning("Cannot unreserve Gateway's IP")
16498 if not self.pool.IsReserved(ip):
16499 self.LogWarning("IP address %s is already unreserved", ip)
16501 self.pool.Release(ip, external=True)
16502 except errors.AddressPoolError, err:
16503 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16505 if self.op.mac_prefix:
16506 self.network.mac_prefix = self.mac_prefix
16508 if self.op.network6:
16509 self.network.network6 = self.network6
16511 if self.op.gateway6:
16512 self.network.gateway6 = self.gateway6
16514 if self.op.network_type:
16515 self.network.network_type = self.network_type
16517 self.pool.Validate()
16519 self.cfg.Update(self.network, feedback_fn)
16522 class _NetworkQuery(_QueryBase):
16523 FIELDS = query.NETWORK_FIELDS
16525 def ExpandNames(self, lu):
16526 lu.needed_locks = {}
16527 lu.share_locks = _ShareAll()
16529 self.do_locking = self.use_locking
16531 all_networks = lu.cfg.GetAllNetworksInfo()
16532 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16538 for name in self.names:
16539 if name in name_to_uuid:
16540 self.wanted.append(name_to_uuid[name])
16542 missing.append(name)
16545 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16546 errors.ECODE_NOENT)
16548 self.wanted = locking.ALL_SET
16550 if self.do_locking:
16551 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16552 if query.NETQ_INST in self.requested_data:
16553 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16554 if query.NETQ_GROUP in self.requested_data:
16555 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16557 def DeclareLocks(self, lu, level):
16560 def _GetQueryData(self, lu):
16561 """Computes the list of networks and their attributes.
16564 all_networks = lu.cfg.GetAllNetworksInfo()
16566 network_uuids = self._GetNames(lu, all_networks.keys(),
16567 locking.LEVEL_NETWORK)
16569 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16571 do_instances = query.NETQ_INST in self.requested_data
16572 do_groups = query.NETQ_GROUP in self.requested_data
16574 network_to_instances = None
16575 network_to_groups = None
16577 # For NETQ_GROUP, we need to map network->[groups]
16579 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16580 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16581 for _, group in all_groups.iteritems():
16582 for net_uuid in network_uuids:
16583 netparams = group.networks.get(net_uuid, None)
16585 info = (group.name, netparams[constants.NIC_MODE],
16586 netparams[constants.NIC_LINK])
16588 network_to_groups[net_uuid].append(info)
16591 all_instances = lu.cfg.GetAllInstancesInfo()
16592 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16593 for instance in all_instances.values():
16594 for nic in instance.nics:
16596 net_uuid = name_to_uuid[nic.network]
16597 if net_uuid in network_uuids:
16598 network_to_instances[net_uuid].append(instance.name)
16601 if query.NETQ_STATS in self.requested_data:
16604 self._GetStats(network.AddressPool(all_networks[uuid])))
16605 for uuid in network_uuids)
16609 return query.NetworkQueryData([all_networks[uuid]
16610 for uuid in network_uuids],
16612 network_to_instances,
16616 def _GetStats(pool):
16617 """Returns statistics for a network address pool.
16621 "free_count": pool.GetFreeCount(),
16622 "reserved_count": pool.GetReservedCount(),
16623 "map": pool.GetMap(),
16624 "external_reservations":
16625 utils.CommaJoin(pool.GetExternalReservations()),
16629 class LUNetworkQuery(NoHooksLU):
16630 """Logical unit for querying networks.
16635 def CheckArguments(self):
16636 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16637 self.op.output_fields, self.op.use_locking)
16639 def ExpandNames(self):
16640 self.nq.ExpandNames(self)
16642 def Exec(self, feedback_fn):
16643 return self.nq.OldStyleQuery(self)
16646 class LUNetworkConnect(LogicalUnit):
16647 """Connect a network to a nodegroup
16650 HPATH = "network-connect"
16651 HTYPE = constants.HTYPE_NETWORK
16654 def ExpandNames(self):
16655 self.network_name = self.op.network_name
16656 self.group_name = self.op.group_name
16657 self.network_mode = self.op.network_mode
16658 self.network_link = self.op.network_link
16660 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16661 if self.network_uuid is None:
16662 raise errors.OpPrereqError("Network '%s' does not exist" %
16663 self.network_name, errors.ECODE_NOENT)
16665 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16666 if self.group_uuid is None:
16667 raise errors.OpPrereqError("Group '%s' does not exist" %
16668 self.group_name, errors.ECODE_NOENT)
16670 self.needed_locks = {
16671 locking.LEVEL_INSTANCE: [],
16672 locking.LEVEL_NODEGROUP: [self.group_uuid],
16674 self.share_locks[locking.LEVEL_INSTANCE] = 1
16676 if self.op.conflicts_check:
16677 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16678 self.share_locks[locking.LEVEL_NETWORK] = 1
16680 def DeclareLocks(self, level):
16681 if level == locking.LEVEL_INSTANCE:
16682 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16684 # Lock instances optimistically, needs verification once group lock has
16686 if self.op.conflicts_check:
16687 self.needed_locks[locking.LEVEL_INSTANCE] = \
16688 self.cfg.GetNodeGroupInstances(self.group_uuid)
16690 def BuildHooksEnv(self):
16692 "GROUP_NAME": self.group_name,
16693 "GROUP_NETWORK_MODE": self.network_mode,
16694 "GROUP_NETWORK_LINK": self.network_link,
16698 def BuildHooksNodes(self):
16699 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16700 return (nodes, nodes)
16702 def CheckPrereq(self):
16703 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16705 assert self.group_uuid in owned_groups
16708 constants.NIC_MODE: self.network_mode,
16709 constants.NIC_LINK: self.network_link,
16711 objects.NIC.CheckParameterSyntax(self.netparams)
16713 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16714 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16715 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16716 self.connected = False
16717 if self.network_uuid in self.group.networks:
16718 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16719 (self.network_name, self.group.name))
16720 self.connected = True
16723 if self.op.conflicts_check:
16724 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16726 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16729 def Exec(self, feedback_fn):
16733 self.group.networks[self.network_uuid] = self.netparams
16734 self.cfg.Update(self.group, feedback_fn)
16737 def _NetworkConflictCheck(lu, check_fn, action):
16738 """Checks for network interface conflicts with a network.
16740 @type lu: L{LogicalUnit}
16741 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16743 @param check_fn: Function checking for conflict
16744 @type action: string
16745 @param action: Part of error message (see code)
16746 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16749 # Check if locked instances are still correct
16750 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16751 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16755 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16756 instconflicts = [(idx, nic.ip)
16757 for (idx, nic) in enumerate(instance.nics)
16761 conflicts.append((instance.name, instconflicts))
16764 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16765 " node group '%s', are in use: %s" %
16766 (lu.network_name, action, lu.group.name,
16767 utils.CommaJoin(("%s: %s" %
16768 (name, _FmtNetworkConflict(details)))
16769 for (name, details) in conflicts)))
16771 raise errors.OpPrereqError("Conflicting IP addresses found; "
16772 " remove/modify the corresponding network"
16773 " interfaces", errors.ECODE_STATE)
16776 def _FmtNetworkConflict(details):
16777 """Utility for L{_NetworkConflictCheck}.
16780 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16781 for (idx, ipaddr) in details)
16784 class LUNetworkDisconnect(LogicalUnit):
16785 """Disconnect a network to a nodegroup
16788 HPATH = "network-disconnect"
16789 HTYPE = constants.HTYPE_NETWORK
16792 def ExpandNames(self):
16793 self.network_name = self.op.network_name
16794 self.group_name = self.op.group_name
16796 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16797 if self.network_uuid is None:
16798 raise errors.OpPrereqError("Network '%s' does not exist" %
16799 self.network_name, errors.ECODE_NOENT)
16801 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16802 if self.group_uuid is None:
16803 raise errors.OpPrereqError("Group '%s' does not exist" %
16804 self.group_name, errors.ECODE_NOENT)
16806 self.needed_locks = {
16807 locking.LEVEL_INSTANCE: [],
16808 locking.LEVEL_NODEGROUP: [self.group_uuid],
16810 self.share_locks[locking.LEVEL_INSTANCE] = 1
16812 def DeclareLocks(self, level):
16813 if level == locking.LEVEL_INSTANCE:
16814 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16816 # Lock instances optimistically, needs verification once group lock has
16818 if self.op.conflicts_check:
16819 self.needed_locks[locking.LEVEL_INSTANCE] = \
16820 self.cfg.GetNodeGroupInstances(self.group_uuid)
16822 def BuildHooksEnv(self):
16824 "GROUP_NAME": self.group_name,
16828 def BuildHooksNodes(self):
16829 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16830 return (nodes, nodes)
16832 def CheckPrereq(self):
16833 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16835 assert self.group_uuid in owned_groups
16837 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16838 self.connected = True
16839 if self.network_uuid not in self.group.networks:
16840 self.LogWarning("Network '%s' is not mapped to group '%s'",
16841 self.network_name, self.group.name)
16842 self.connected = False
16845 if self.op.conflicts_check:
16846 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16849 def Exec(self, feedback_fn):
16850 if not self.connected:
16853 del self.group.networks[self.network_uuid]
16854 self.cfg.Update(self.group, feedback_fn)
16857 #: Query type implementations
16859 constants.QR_CLUSTER: _ClusterQuery,
16860 constants.QR_INSTANCE: _InstanceQuery,
16861 constants.QR_NODE: _NodeQuery,
16862 constants.QR_GROUP: _GroupQuery,
16863 constants.QR_NETWORK: _NetworkQuery,
16864 constants.QR_OS: _OsQuery,
16865 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16866 constants.QR_EXPORT: _ExportQuery,
16869 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16872 def _GetQueryImplementation(name):
16873 """Returns the implemtnation for a query type.
16875 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16879 return _QUERY_IMPL[name]
16881 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16882 errors.ECODE_INVAL)
16885 def _CheckForConflictingIp(lu, ip, node):
16886 """In case of conflicting IP address raise error.
16889 @param ip: IP address
16891 @param node: node name
16894 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16895 if conf_net is not None:
16896 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16898 errors.ECODE_STATE)
16900 return (None, None)