4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _AnnotateDiskParams(instance, devs, cfg):
589 """Little helper wrapper to the rpc annotation method.
591 @param instance: The instance object
592 @type devs: List of L{objects.Disk}
593 @param devs: The root devices (not any of its children!)
594 @param cfg: The config object
595 @returns The annotated disk copies
596 @see L{rpc.AnnotateDiskParams}
599 return rpc.AnnotateDiskParams(instance.disk_template, devs,
600 cfg.GetInstanceDiskParams(instance))
603 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
605 """Checks if node groups for locked instances are still correct.
607 @type cfg: L{config.ConfigWriter}
608 @param cfg: Cluster configuration
609 @type instances: dict; string as key, L{objects.Instance} as value
610 @param instances: Dictionary, instance name as key, instance object as value
611 @type owned_groups: iterable of string
612 @param owned_groups: List of owned groups
613 @type owned_nodes: iterable of string
614 @param owned_nodes: List of owned nodes
615 @type cur_group_uuid: string or None
616 @param cur_group_uuid: Optional group UUID to check against instance's groups
619 for (name, inst) in instances.items():
620 assert owned_nodes.issuperset(inst.all_nodes), \
621 "Instance %s's nodes changed while we kept the lock" % name
623 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
625 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
626 "Instance %s has no node in group %s" % (name, cur_group_uuid)
629 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
631 """Checks if the owned node groups are still correct for an instance.
633 @type cfg: L{config.ConfigWriter}
634 @param cfg: The cluster configuration
635 @type instance_name: string
636 @param instance_name: Instance name
637 @type owned_groups: set or frozenset
638 @param owned_groups: List of currently owned node groups
639 @type primary_only: boolean
640 @param primary_only: Whether to check node groups for only the primary node
643 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
645 if not owned_groups.issuperset(inst_groups):
646 raise errors.OpPrereqError("Instance %s's node groups changed since"
647 " locks were acquired, current groups are"
648 " are '%s', owning groups '%s'; retry the"
651 utils.CommaJoin(inst_groups),
652 utils.CommaJoin(owned_groups)),
658 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
659 """Checks if the instances in a node group are still correct.
661 @type cfg: L{config.ConfigWriter}
662 @param cfg: The cluster configuration
663 @type group_uuid: string
664 @param group_uuid: Node group UUID
665 @type owned_instances: set or frozenset
666 @param owned_instances: List of currently owned instances
669 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
670 if owned_instances != wanted_instances:
671 raise errors.OpPrereqError("Instances in node group '%s' changed since"
672 " locks were acquired, wanted '%s', have '%s';"
673 " retry the operation" %
675 utils.CommaJoin(wanted_instances),
676 utils.CommaJoin(owned_instances)),
679 return wanted_instances
682 def _SupportsOob(cfg, node):
683 """Tells if node supports OOB.
685 @type cfg: L{config.ConfigWriter}
686 @param cfg: The cluster configuration
687 @type node: L{objects.Node}
688 @param node: The node
689 @return: The OOB script if supported or an empty string otherwise
692 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
695 def _CopyLockList(names):
696 """Makes a copy of a list of lock names.
698 Handles L{locking.ALL_SET} correctly.
701 if names == locking.ALL_SET:
702 return locking.ALL_SET
707 def _GetWantedNodes(lu, nodes):
708 """Returns list of checked and expanded node names.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
713 @param nodes: list of node names or None for all nodes
715 @return: the list of nodes, sorted
716 @raise errors.ProgrammerError: if the nodes parameter is wrong type
720 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
722 return utils.NiceSort(lu.cfg.GetNodeList())
725 def _GetWantedInstances(lu, instances):
726 """Returns list of checked and expanded instance names.
728 @type lu: L{LogicalUnit}
729 @param lu: the logical unit on whose behalf we execute
730 @type instances: list
731 @param instances: list of instance names or None for all instances
733 @return: the list of instances, sorted
734 @raise errors.OpPrereqError: if the instances parameter is wrong type
735 @raise errors.OpPrereqError: if any of the passed instances is not found
739 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
741 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
745 def _GetUpdatedParams(old_params, update_dict,
746 use_default=True, use_none=False):
747 """Return the new version of a parameter dictionary.
749 @type old_params: dict
750 @param old_params: old parameters
751 @type update_dict: dict
752 @param update_dict: dict containing new parameter values, or
753 constants.VALUE_DEFAULT to reset the parameter to its default
755 @param use_default: boolean
756 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
757 values as 'to be deleted' values
758 @param use_none: boolean
759 @type use_none: whether to recognise C{None} values as 'to be
762 @return: the new parameter dictionary
765 params_copy = copy.deepcopy(old_params)
766 for key, val in update_dict.iteritems():
767 if ((use_default and val == constants.VALUE_DEFAULT) or
768 (use_none and val is None)):
774 params_copy[key] = val
778 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
779 """Return the new version of a instance policy.
781 @param group_policy: whether this policy applies to a group and thus
782 we should support removal of policy entries
785 use_none = use_default = group_policy
786 ipolicy = copy.deepcopy(old_ipolicy)
787 for key, value in new_ipolicy.items():
788 if key not in constants.IPOLICY_ALL_KEYS:
789 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
791 if key in constants.IPOLICY_ISPECS:
792 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
793 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
795 use_default=use_default)
797 if (not value or value == [constants.VALUE_DEFAULT] or
798 value == constants.VALUE_DEFAULT):
802 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
803 " on the cluster'" % key,
806 if key in constants.IPOLICY_PARAMETERS:
807 # FIXME: we assume all such values are float
809 ipolicy[key] = float(value)
810 except (TypeError, ValueError), err:
811 raise errors.OpPrereqError("Invalid value for attribute"
812 " '%s': '%s', error: %s" %
813 (key, value, err), errors.ECODE_INVAL)
815 # FIXME: we assume all others are lists; this should be redone
817 ipolicy[key] = list(value)
819 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
820 except errors.ConfigurationError, err:
821 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
826 def _UpdateAndVerifySubDict(base, updates, type_check):
827 """Updates and verifies a dict with sub dicts of the same type.
829 @param base: The dict with the old data
830 @param updates: The dict with the new data
831 @param type_check: Dict suitable to ForceDictType to verify correct types
832 @returns: A new dict with updated and verified values
836 new = _GetUpdatedParams(old, value)
837 utils.ForceDictType(new, type_check)
840 ret = copy.deepcopy(base)
841 ret.update(dict((key, fn(base.get(key, {}), value))
842 for key, value in updates.items()))
846 def _MergeAndVerifyHvState(op_input, obj_input):
847 """Combines the hv state from an opcode with the one of the object
849 @param op_input: The input dict from the opcode
850 @param obj_input: The input dict from the objects
851 @return: The verified and updated dict
855 invalid_hvs = set(op_input) - constants.HYPER_TYPES
857 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
858 " %s" % utils.CommaJoin(invalid_hvs),
860 if obj_input is None:
862 type_check = constants.HVSTS_PARAMETER_TYPES
863 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
868 def _MergeAndVerifyDiskState(op_input, obj_input):
869 """Combines the disk state from an opcode with the one of the object
871 @param op_input: The input dict from the opcode
872 @param obj_input: The input dict from the objects
873 @return: The verified and updated dict
876 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
878 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
879 utils.CommaJoin(invalid_dst),
881 type_check = constants.DSS_PARAMETER_TYPES
882 if obj_input is None:
884 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
886 for key, value in op_input.items())
891 def _ReleaseLocks(lu, level, names=None, keep=None):
892 """Releases locks owned by an LU.
894 @type lu: L{LogicalUnit}
895 @param level: Lock level
896 @type names: list or None
897 @param names: Names of locks to release
898 @type keep: list or None
899 @param keep: Names of locks to retain
902 assert not (keep is not None and names is not None), \
903 "Only one of the 'names' and the 'keep' parameters can be given"
905 if names is not None:
906 should_release = names.__contains__
908 should_release = lambda name: name not in keep
910 should_release = None
912 owned = lu.owned_locks(level)
914 # Not owning any lock at this level, do nothing
921 # Determine which locks to release
923 if should_release(name):
928 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
930 # Release just some locks
931 lu.glm.release(level, names=release)
933 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
936 lu.glm.release(level)
938 assert not lu.glm.is_owned(level), "No locks should be owned"
941 def _MapInstanceDisksToNodes(instances):
942 """Creates a map from (node, volume) to instance name.
944 @type instances: list of L{objects.Instance}
945 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
948 return dict(((node, vol), inst.name)
949 for inst in instances
950 for (node, vols) in inst.MapLVsByNode().items()
954 def _RunPostHook(lu, node_name):
955 """Runs the post-hook for an opcode on a single node.
958 hm = lu.proc.BuildHooksManager(lu)
960 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 except Exception, err: # pylint: disable=W0703
962 lu.LogWarning("Errors occurred running hooks on %s: %s",
966 def _CheckOutputFields(static, dynamic, selected):
967 """Checks whether all selected fields are valid.
969 @type static: L{utils.FieldSet}
970 @param static: static fields set
971 @type dynamic: L{utils.FieldSet}
972 @param dynamic: dynamic fields set
979 delta = f.NonMatching(selected)
981 raise errors.OpPrereqError("Unknown output fields selected: %s"
982 % ",".join(delta), errors.ECODE_INVAL)
985 def _CheckGlobalHvParams(params):
986 """Validates that given hypervisor params are not global ones.
988 This will ensure that instances don't get customised versions of
992 used_globals = constants.HVC_GLOBALS.intersection(params)
994 msg = ("The following hypervisor parameters are global and cannot"
995 " be customized at instance level, please modify them at"
996 " cluster level: %s" % utils.CommaJoin(used_globals))
997 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1000 def _CheckNodeOnline(lu, node, msg=None):
1001 """Ensure that a given node is online.
1003 @param lu: the LU on behalf of which we make the check
1004 @param node: the node to check
1005 @param msg: if passed, should be a message to replace the default one
1006 @raise errors.OpPrereqError: if the node is offline
1010 msg = "Can't use offline node"
1011 if lu.cfg.GetNodeInfo(node).offline:
1012 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1015 def _CheckNodeNotDrained(lu, node):
1016 """Ensure that a given node is not drained.
1018 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @raise errors.OpPrereqError: if the node is drained
1023 if lu.cfg.GetNodeInfo(node).drained:
1024 raise errors.OpPrereqError("Can't use drained node %s" % node,
1028 def _CheckNodeVmCapable(lu, node):
1029 """Ensure that a given node is vm capable.
1031 @param lu: the LU on behalf of which we make the check
1032 @param node: the node to check
1033 @raise errors.OpPrereqError: if the node is not vm capable
1036 if not lu.cfg.GetNodeInfo(node).vm_capable:
1037 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1041 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1042 """Ensure that a node supports a given OS.
1044 @param lu: the LU on behalf of which we make the check
1045 @param node: the node to check
1046 @param os_name: the OS to query about
1047 @param force_variant: whether to ignore variant errors
1048 @raise errors.OpPrereqError: if the node is not supporting the OS
1051 result = lu.rpc.call_os_get(node, os_name)
1052 result.Raise("OS '%s' not in supported OS list for node %s" %
1054 prereq=True, ecode=errors.ECODE_INVAL)
1055 if not force_variant:
1056 _CheckOSVariant(result.payload, os_name)
1059 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1060 """Ensure that a node has the given secondary ip.
1062 @type lu: L{LogicalUnit}
1063 @param lu: the LU on behalf of which we make the check
1065 @param node: the node to check
1066 @type secondary_ip: string
1067 @param secondary_ip: the ip to check
1068 @type prereq: boolean
1069 @param prereq: whether to throw a prerequisite or an execute error
1070 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1071 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1074 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1075 result.Raise("Failure checking secondary ip on node %s" % node,
1076 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1077 if not result.payload:
1078 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1079 " please fix and re-run this command" % secondary_ip)
1081 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1083 raise errors.OpExecError(msg)
1086 def _GetClusterDomainSecret():
1087 """Reads the cluster domain secret.
1090 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1094 def _CheckInstanceState(lu, instance, req_states, msg=None):
1095 """Ensure that an instance is in one of the required states.
1097 @param lu: the LU on behalf of which we make the check
1098 @param instance: the instance to check
1099 @param msg: if passed, should be a message to replace the default one
1100 @raise errors.OpPrereqError: if the instance is not in the required state
1104 msg = ("can't use instance from outside %s states" %
1105 utils.CommaJoin(req_states))
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 if not lu.cfg.GetNodeInfo(pnode).offline:
1114 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1121 lu.LogWarning("Primary node offline, ignoring check that instance"
1125 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1126 """Computes if value is in the desired range.
1128 @param name: name of the parameter for which we perform the check
1129 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1131 @param ipolicy: dictionary containing min, max and std values
1132 @param value: actual value that we want to use
1133 @return: None or element not meeting the criteria
1137 if value in [None, constants.VALUE_AUTO]:
1139 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1140 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1141 if value > max_v or min_v > value:
1143 fqn = "%s/%s" % (name, qualifier)
1146 return ("%s value %s is not in range [%s, %s]" %
1147 (fqn, value, min_v, max_v))
1151 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1152 nic_count, disk_sizes, spindle_use,
1153 _compute_fn=_ComputeMinMaxSpec):
1154 """Verifies ipolicy against provided specs.
1157 @param ipolicy: The ipolicy
1159 @param mem_size: The memory size
1160 @type cpu_count: int
1161 @param cpu_count: Used cpu cores
1162 @type disk_count: int
1163 @param disk_count: Number of disks used
1164 @type nic_count: int
1165 @param nic_count: Number of nics used
1166 @type disk_sizes: list of ints
1167 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1168 @type spindle_use: int
1169 @param spindle_use: The number of spindles this instance uses
1170 @param _compute_fn: The compute function (unittest only)
1171 @return: A list of violations, or an empty list of no violations are found
1174 assert disk_count == len(disk_sizes)
1177 (constants.ISPEC_MEM_SIZE, "", mem_size),
1178 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1179 (constants.ISPEC_DISK_COUNT, "", disk_count),
1180 (constants.ISPEC_NIC_COUNT, "", nic_count),
1181 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1182 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1183 for idx, d in enumerate(disk_sizes)]
1186 (_compute_fn(name, qualifier, ipolicy, value)
1187 for (name, qualifier, value) in test_settings))
1190 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1191 _compute_fn=_ComputeIPolicySpecViolation):
1192 """Compute if instance meets the specs of ipolicy.
1195 @param ipolicy: The ipolicy to verify against
1196 @type instance: L{objects.Instance}
1197 @param instance: The instance to verify
1198 @param _compute_fn: The function to verify ipolicy (unittest only)
1199 @see: L{_ComputeIPolicySpecViolation}
1202 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1203 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1204 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1205 disk_count = len(instance.disks)
1206 disk_sizes = [disk.size for disk in instance.disks]
1207 nic_count = len(instance.nics)
1209 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1210 disk_sizes, spindle_use)
1213 def _ComputeIPolicyInstanceSpecViolation(
1214 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1215 """Compute if instance specs meets the specs of ipolicy.
1218 @param ipolicy: The ipolicy to verify against
1219 @param instance_spec: dict
1220 @param instance_spec: The instance spec to verify
1221 @param _compute_fn: The function to verify ipolicy (unittest only)
1222 @see: L{_ComputeIPolicySpecViolation}
1225 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1226 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1227 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1228 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1229 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1230 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1232 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1233 disk_sizes, spindle_use)
1236 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1238 _compute_fn=_ComputeIPolicyInstanceViolation):
1239 """Compute if instance meets the specs of the new target group.
1241 @param ipolicy: The ipolicy to verify
1242 @param instance: The instance object to verify
1243 @param current_group: The current group of the instance
1244 @param target_group: The new group of the instance
1245 @param _compute_fn: The function to verify ipolicy (unittest only)
1246 @see: L{_ComputeIPolicySpecViolation}
1249 if current_group == target_group:
1252 return _compute_fn(ipolicy, instance)
1255 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1256 _compute_fn=_ComputeIPolicyNodeViolation):
1257 """Checks that the target node is correct in terms of instance policy.
1259 @param ipolicy: The ipolicy to verify
1260 @param instance: The instance object to verify
1261 @param node: The new node to relocate
1262 @param ignore: Ignore violations of the ipolicy
1263 @param _compute_fn: The function to verify ipolicy (unittest only)
1264 @see: L{_ComputeIPolicySpecViolation}
1267 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1268 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1271 msg = ("Instance does not meet target node group's (%s) instance"
1272 " policy: %s") % (node.group, utils.CommaJoin(res))
1276 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1279 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1280 """Computes a set of any instances that would violate the new ipolicy.
1282 @param old_ipolicy: The current (still in-place) ipolicy
1283 @param new_ipolicy: The new (to become) ipolicy
1284 @param instances: List of instances to verify
1285 @return: A list of instances which violates the new ipolicy but
1289 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1290 _ComputeViolatingInstances(old_ipolicy, instances))
1293 def _ExpandItemName(fn, name, kind):
1294 """Expand an item name.
1296 @param fn: the function to use for expansion
1297 @param name: requested item name
1298 @param kind: text description ('Node' or 'Instance')
1299 @return: the resolved (full) name
1300 @raise errors.OpPrereqError: if the item is not found
1303 full_name = fn(name)
1304 if full_name is None:
1305 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1310 def _ExpandNodeName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for nodes."""
1312 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1315 def _ExpandInstanceName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for instance."""
1317 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1320 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1321 network_type, mac_prefix, tags):
1322 """Builds network related env variables for hooks
1324 This builds the hook environment from individual variables.
1327 @param name: the name of the network
1328 @type subnet: string
1329 @param subnet: the ipv4 subnet
1330 @type gateway: string
1331 @param gateway: the ipv4 gateway
1332 @type network6: string
1333 @param network6: the ipv6 subnet
1334 @type gateway6: string
1335 @param gateway6: the ipv6 gateway
1336 @type network_type: string
1337 @param network_type: the type of the network
1338 @type mac_prefix: string
1339 @param mac_prefix: the mac_prefix
1341 @param tags: the tags of the network
1346 env["NETWORK_NAME"] = name
1348 env["NETWORK_SUBNET"] = subnet
1350 env["NETWORK_GATEWAY"] = gateway
1352 env["NETWORK_SUBNET6"] = network6
1354 env["NETWORK_GATEWAY6"] = gateway6
1356 env["NETWORK_MAC_PREFIX"] = mac_prefix
1358 env["NETWORK_TYPE"] = network_type
1360 env["NETWORK_TAGS"] = " ".join(tags)
1365 def _BuildNetworkHookEnvByObject(net):
1366 """Builds network related env varliables for hooks
1368 @type net: L{objects.Network}
1369 @param net: the network object
1374 "subnet": net.network,
1375 "gateway": net.gateway,
1376 "network6": net.network6,
1377 "gateway6": net.gateway6,
1378 "network_type": net.network_type,
1379 "mac_prefix": net.mac_prefix,
1383 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1386 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1387 minmem, maxmem, vcpus, nics, disk_template, disks,
1388 bep, hvp, hypervisor_name, tags):
1389 """Builds instance related env variables for hooks
1391 This builds the hook environment from individual variables.
1394 @param name: the name of the instance
1395 @type primary_node: string
1396 @param primary_node: the name of the instance's primary node
1397 @type secondary_nodes: list
1398 @param secondary_nodes: list of secondary nodes as strings
1399 @type os_type: string
1400 @param os_type: the name of the instance's OS
1401 @type status: string
1402 @param status: the desired status of the instance
1403 @type minmem: string
1404 @param minmem: the minimum memory size of the instance
1405 @type maxmem: string
1406 @param maxmem: the maximum memory size of the instance
1408 @param vcpus: the count of VCPUs the instance has
1410 @param nics: list of tuples (ip, mac, mode, link, network) representing
1411 the NICs the instance has
1412 @type disk_template: string
1413 @param disk_template: the disk template of the instance
1415 @param disks: the list of (size, mode) pairs
1417 @param bep: the backend parameters for the instance
1419 @param hvp: the hypervisor parameters for the instance
1420 @type hypervisor_name: string
1421 @param hypervisor_name: the hypervisor for the instance
1423 @param tags: list of instance tags as strings
1425 @return: the hook environment for this instance
1430 "INSTANCE_NAME": name,
1431 "INSTANCE_PRIMARY": primary_node,
1432 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1433 "INSTANCE_OS_TYPE": os_type,
1434 "INSTANCE_STATUS": status,
1435 "INSTANCE_MINMEM": minmem,
1436 "INSTANCE_MAXMEM": maxmem,
1437 # TODO(2.7) remove deprecated "memory" value
1438 "INSTANCE_MEMORY": maxmem,
1439 "INSTANCE_VCPUS": vcpus,
1440 "INSTANCE_DISK_TEMPLATE": disk_template,
1441 "INSTANCE_HYPERVISOR": hypervisor_name,
1444 nic_count = len(nics)
1445 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1448 env["INSTANCE_NIC%d_IP" % idx] = ip
1449 env["INSTANCE_NIC%d_MAC" % idx] = mac
1450 env["INSTANCE_NIC%d_MODE" % idx] = mode
1451 env["INSTANCE_NIC%d_LINK" % idx] = link
1453 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1455 nobj = objects.Network.FromDict(netinfo)
1457 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1459 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1461 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1463 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1465 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1466 if nobj.network_type:
1467 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1469 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1470 if mode == constants.NIC_MODE_BRIDGED:
1471 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1475 env["INSTANCE_NIC_COUNT"] = nic_count
1478 disk_count = len(disks)
1479 for idx, (size, mode) in enumerate(disks):
1480 env["INSTANCE_DISK%d_SIZE" % idx] = size
1481 env["INSTANCE_DISK%d_MODE" % idx] = mode
1485 env["INSTANCE_DISK_COUNT"] = disk_count
1490 env["INSTANCE_TAGS"] = " ".join(tags)
1492 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1493 for key, value in source.items():
1494 env["INSTANCE_%s_%s" % (kind, key)] = value
1499 def _NICToTuple(lu, nic):
1500 """Build a tupple of nic information.
1502 @type lu: L{LogicalUnit}
1503 @param lu: the logical unit on whose behalf we execute
1504 @type nic: L{objects.NIC}
1505 @param nic: nic to convert to hooks tuple
1510 cluster = lu.cfg.GetClusterInfo()
1511 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1512 mode = filled_params[constants.NIC_MODE]
1513 link = filled_params[constants.NIC_LINK]
1517 net_uuid = lu.cfg.LookupNetwork(net)
1519 nobj = lu.cfg.GetNetwork(net_uuid)
1520 netinfo = objects.Network.ToDict(nobj)
1521 return (ip, mac, mode, link, net, netinfo)
1524 def _NICListToTuple(lu, nics):
1525 """Build a list of nic information tuples.
1527 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1528 value in LUInstanceQueryData.
1530 @type lu: L{LogicalUnit}
1531 @param lu: the logical unit on whose behalf we execute
1532 @type nics: list of L{objects.NIC}
1533 @param nics: list of nics to convert to hooks tuples
1538 hooks_nics.append(_NICToTuple(lu, nic))
1542 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1543 """Builds instance related env variables for hooks from an object.
1545 @type lu: L{LogicalUnit}
1546 @param lu: the logical unit on whose behalf we execute
1547 @type instance: L{objects.Instance}
1548 @param instance: the instance for which we should build the
1550 @type override: dict
1551 @param override: dictionary with key/values that will override
1554 @return: the hook environment dictionary
1557 cluster = lu.cfg.GetClusterInfo()
1558 bep = cluster.FillBE(instance)
1559 hvp = cluster.FillHV(instance)
1561 "name": instance.name,
1562 "primary_node": instance.primary_node,
1563 "secondary_nodes": instance.secondary_nodes,
1564 "os_type": instance.os,
1565 "status": instance.admin_state,
1566 "maxmem": bep[constants.BE_MAXMEM],
1567 "minmem": bep[constants.BE_MINMEM],
1568 "vcpus": bep[constants.BE_VCPUS],
1569 "nics": _NICListToTuple(lu, instance.nics),
1570 "disk_template": instance.disk_template,
1571 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1574 "hypervisor_name": instance.hypervisor,
1575 "tags": instance.tags,
1578 args.update(override)
1579 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1582 def _AdjustCandidatePool(lu, exceptions):
1583 """Adjust the candidate pool after node operations.
1586 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1588 lu.LogInfo("Promoted nodes to master candidate role: %s",
1589 utils.CommaJoin(node.name for node in mod_list))
1590 for name in mod_list:
1591 lu.context.ReaddNode(name)
1592 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1594 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1598 def _DecideSelfPromotion(lu, exceptions=None):
1599 """Decide whether I should promote myself as a master candidate.
1602 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1603 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1604 # the new node will increase mc_max with one, so:
1605 mc_should = min(mc_should + 1, cp_size)
1606 return mc_now < mc_should
1609 def _ComputeViolatingInstances(ipolicy, instances):
1610 """Computes a set of instances who violates given ipolicy.
1612 @param ipolicy: The ipolicy to verify
1613 @type instances: object.Instance
1614 @param instances: List of instances to verify
1615 @return: A frozenset of instance names violating the ipolicy
1618 return frozenset([inst.name for inst in instances
1619 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1622 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1623 """Check that the brigdes needed by a list of nics exist.
1626 cluster = lu.cfg.GetClusterInfo()
1627 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1628 brlist = [params[constants.NIC_LINK] for params in paramslist
1629 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1631 result = lu.rpc.call_bridges_exist(target_node, brlist)
1632 result.Raise("Error checking bridges on destination node '%s'" %
1633 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1636 def _CheckInstanceBridgesExist(lu, instance, node=None):
1637 """Check that the brigdes needed by an instance exist.
1641 node = instance.primary_node
1642 _CheckNicsBridgesExist(lu, instance.nics, node)
1645 def _CheckOSVariant(os_obj, name):
1646 """Check whether an OS name conforms to the os variants specification.
1648 @type os_obj: L{objects.OS}
1649 @param os_obj: OS object to check
1651 @param name: OS name passed by the user, to check for validity
1654 variant = objects.OS.GetVariant(name)
1655 if not os_obj.supported_variants:
1657 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1658 " passed)" % (os_obj.name, variant),
1662 raise errors.OpPrereqError("OS name must include a variant",
1665 if variant not in os_obj.supported_variants:
1666 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1669 def _GetNodeInstancesInner(cfg, fn):
1670 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1673 def _GetNodeInstances(cfg, node_name):
1674 """Returns a list of all primary and secondary instances on a node.
1678 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1681 def _GetNodePrimaryInstances(cfg, node_name):
1682 """Returns primary instances on a node.
1685 return _GetNodeInstancesInner(cfg,
1686 lambda inst: node_name == inst.primary_node)
1689 def _GetNodeSecondaryInstances(cfg, node_name):
1690 """Returns secondary instances on a node.
1693 return _GetNodeInstancesInner(cfg,
1694 lambda inst: node_name in inst.secondary_nodes)
1697 def _GetStorageTypeArgs(cfg, storage_type):
1698 """Returns the arguments for a storage type.
1701 # Special case for file storage
1702 if storage_type == constants.ST_FILE:
1703 # storage.FileStorage wants a list of storage directories
1704 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1709 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1712 for dev in instance.disks:
1713 cfg.SetDiskID(dev, node_name)
1715 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1717 result.Raise("Failed to get disk status from node %s" % node_name,
1718 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1720 for idx, bdev_status in enumerate(result.payload):
1721 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1727 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1728 """Check the sanity of iallocator and node arguments and use the
1729 cluster-wide iallocator if appropriate.
1731 Check that at most one of (iallocator, node) is specified. If none is
1732 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1733 then the LU's opcode's iallocator slot is filled with the cluster-wide
1736 @type iallocator_slot: string
1737 @param iallocator_slot: the name of the opcode iallocator slot
1738 @type node_slot: string
1739 @param node_slot: the name of the opcode target node slot
1742 node = getattr(lu.op, node_slot, None)
1743 ialloc = getattr(lu.op, iallocator_slot, None)
1747 if node is not None and ialloc is not None:
1748 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1750 elif ((node is None and ialloc is None) or
1751 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1752 default_iallocator = lu.cfg.GetDefaultIAllocator()
1753 if default_iallocator:
1754 setattr(lu.op, iallocator_slot, default_iallocator)
1756 raise errors.OpPrereqError("No iallocator or node given and no"
1757 " cluster-wide default iallocator found;"
1758 " please specify either an iallocator or a"
1759 " node, or set a cluster-wide default"
1760 " iallocator", errors.ECODE_INVAL)
1763 def _GetDefaultIAllocator(cfg, ialloc):
1764 """Decides on which iallocator to use.
1766 @type cfg: L{config.ConfigWriter}
1767 @param cfg: Cluster configuration object
1768 @type ialloc: string or None
1769 @param ialloc: Iallocator specified in opcode
1771 @return: Iallocator name
1775 # Use default iallocator
1776 ialloc = cfg.GetDefaultIAllocator()
1779 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1780 " opcode nor as a cluster-wide default",
1786 def _CheckHostnameSane(lu, name):
1787 """Ensures that a given hostname resolves to a 'sane' name.
1789 The given name is required to be a prefix of the resolved hostname,
1790 to prevent accidental mismatches.
1792 @param lu: the logical unit on behalf of which we're checking
1793 @param name: the name we should resolve and check
1794 @return: the resolved hostname object
1797 hostname = netutils.GetHostname(name=name)
1798 if hostname.name != name:
1799 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1800 if not utils.MatchNameComponent(name, [hostname.name]):
1801 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1802 " same as given hostname '%s'") %
1803 (hostname.name, name), errors.ECODE_INVAL)
1807 class LUClusterPostInit(LogicalUnit):
1808 """Logical unit for running hooks after cluster initialization.
1811 HPATH = "cluster-init"
1812 HTYPE = constants.HTYPE_CLUSTER
1814 def BuildHooksEnv(self):
1819 "OP_TARGET": self.cfg.GetClusterName(),
1822 def BuildHooksNodes(self):
1823 """Build hooks nodes.
1826 return ([], [self.cfg.GetMasterNode()])
1828 def Exec(self, feedback_fn):
1835 class LUClusterDestroy(LogicalUnit):
1836 """Logical unit for destroying the cluster.
1839 HPATH = "cluster-destroy"
1840 HTYPE = constants.HTYPE_CLUSTER
1842 def BuildHooksEnv(self):
1847 "OP_TARGET": self.cfg.GetClusterName(),
1850 def BuildHooksNodes(self):
1851 """Build hooks nodes.
1856 def CheckPrereq(self):
1857 """Check prerequisites.
1859 This checks whether the cluster is empty.
1861 Any errors are signaled by raising errors.OpPrereqError.
1864 master = self.cfg.GetMasterNode()
1866 nodelist = self.cfg.GetNodeList()
1867 if len(nodelist) != 1 or nodelist[0] != master:
1868 raise errors.OpPrereqError("There are still %d node(s) in"
1869 " this cluster." % (len(nodelist) - 1),
1871 instancelist = self.cfg.GetInstanceList()
1873 raise errors.OpPrereqError("There are still %d instance(s) in"
1874 " this cluster." % len(instancelist),
1877 def Exec(self, feedback_fn):
1878 """Destroys the cluster.
1881 master_params = self.cfg.GetMasterNetworkParameters()
1883 # Run post hooks on master node before it's removed
1884 _RunPostHook(self, master_params.name)
1886 ems = self.cfg.GetUseExternalMipScript()
1887 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1890 self.LogWarning("Error disabling the master IP address: %s",
1893 return master_params.name
1896 def _VerifyCertificate(filename):
1897 """Verifies a certificate for L{LUClusterVerifyConfig}.
1899 @type filename: string
1900 @param filename: Path to PEM file
1904 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1905 utils.ReadFile(filename))
1906 except Exception, err: # pylint: disable=W0703
1907 return (LUClusterVerifyConfig.ETYPE_ERROR,
1908 "Failed to load X509 certificate %s: %s" % (filename, err))
1911 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1912 constants.SSL_CERT_EXPIRATION_ERROR)
1915 fnamemsg = "While verifying %s: %s" % (filename, msg)
1920 return (None, fnamemsg)
1921 elif errcode == utils.CERT_WARNING:
1922 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1923 elif errcode == utils.CERT_ERROR:
1924 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1926 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1929 def _GetAllHypervisorParameters(cluster, instances):
1930 """Compute the set of all hypervisor parameters.
1932 @type cluster: L{objects.Cluster}
1933 @param cluster: the cluster object
1934 @param instances: list of L{objects.Instance}
1935 @param instances: additional instances from which to obtain parameters
1936 @rtype: list of (origin, hypervisor, parameters)
1937 @return: a list with all parameters found, indicating the hypervisor they
1938 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1943 for hv_name in cluster.enabled_hypervisors:
1944 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1946 for os_name, os_hvp in cluster.os_hvp.items():
1947 for hv_name, hv_params in os_hvp.items():
1949 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1950 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1952 # TODO: collapse identical parameter values in a single one
1953 for instance in instances:
1954 if instance.hvparams:
1955 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1956 cluster.FillHV(instance)))
1961 class _VerifyErrors(object):
1962 """Mix-in for cluster/group verify LUs.
1964 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1965 self.op and self._feedback_fn to be available.)
1969 ETYPE_FIELD = "code"
1970 ETYPE_ERROR = "ERROR"
1971 ETYPE_WARNING = "WARNING"
1973 def _Error(self, ecode, item, msg, *args, **kwargs):
1974 """Format an error message.
1976 Based on the opcode's error_codes parameter, either format a
1977 parseable error code, or a simpler error string.
1979 This must be called only from Exec and functions called from Exec.
1982 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1983 itype, etxt, _ = ecode
1984 # first complete the msg
1987 # then format the whole message
1988 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1989 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1995 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1996 # and finally report it via the feedback_fn
1997 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1999 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2000 """Log an error message if the passed condition is True.
2004 or self.op.debug_simulate_errors) # pylint: disable=E1101
2006 # If the error code is in the list of ignored errors, demote the error to a
2008 (_, etxt, _) = ecode
2009 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2010 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2013 self._Error(ecode, *args, **kwargs)
2015 # do not mark the operation as failed for WARN cases only
2016 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2017 self.bad = self.bad or cond
2020 class LUClusterVerify(NoHooksLU):
2021 """Submits all jobs necessary to verify the cluster.
2026 def ExpandNames(self):
2027 self.needed_locks = {}
2029 def Exec(self, feedback_fn):
2032 if self.op.group_name:
2033 groups = [self.op.group_name]
2034 depends_fn = lambda: None
2036 groups = self.cfg.GetNodeGroupList()
2038 # Verify global configuration
2040 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2043 # Always depend on global verification
2044 depends_fn = lambda: [(-len(jobs), [])]
2047 [opcodes.OpClusterVerifyGroup(group_name=group,
2048 ignore_errors=self.op.ignore_errors,
2049 depends=depends_fn())]
2050 for group in groups)
2052 # Fix up all parameters
2053 for op in itertools.chain(*jobs): # pylint: disable=W0142
2054 op.debug_simulate_errors = self.op.debug_simulate_errors
2055 op.verbose = self.op.verbose
2056 op.error_codes = self.op.error_codes
2058 op.skip_checks = self.op.skip_checks
2059 except AttributeError:
2060 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2062 return ResultWithJobs(jobs)
2065 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2066 """Verifies the cluster config.
2071 def _VerifyHVP(self, hvp_data):
2072 """Verifies locally the syntax of the hypervisor parameters.
2075 for item, hv_name, hv_params in hvp_data:
2076 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2079 hv_class = hypervisor.GetHypervisor(hv_name)
2080 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2081 hv_class.CheckParameterSyntax(hv_params)
2082 except errors.GenericError, err:
2083 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2085 def ExpandNames(self):
2086 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2087 self.share_locks = _ShareAll()
2089 def CheckPrereq(self):
2090 """Check prerequisites.
2093 # Retrieve all information
2094 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2095 self.all_node_info = self.cfg.GetAllNodesInfo()
2096 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2098 def Exec(self, feedback_fn):
2099 """Verify integrity of cluster, performing various test on nodes.
2103 self._feedback_fn = feedback_fn
2105 feedback_fn("* Verifying cluster config")
2107 for msg in self.cfg.VerifyConfig():
2108 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2110 feedback_fn("* Verifying cluster certificate files")
2112 for cert_filename in pathutils.ALL_CERT_FILES:
2113 (errcode, msg) = _VerifyCertificate(cert_filename)
2114 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2116 feedback_fn("* Verifying hypervisor parameters")
2118 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2119 self.all_inst_info.values()))
2121 feedback_fn("* Verifying all nodes belong to an existing group")
2123 # We do this verification here because, should this bogus circumstance
2124 # occur, it would never be caught by VerifyGroup, which only acts on
2125 # nodes/instances reachable from existing node groups.
2127 dangling_nodes = set(node.name for node in self.all_node_info.values()
2128 if node.group not in self.all_group_info)
2130 dangling_instances = {}
2131 no_node_instances = []
2133 for inst in self.all_inst_info.values():
2134 if inst.primary_node in dangling_nodes:
2135 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2136 elif inst.primary_node not in self.all_node_info:
2137 no_node_instances.append(inst.name)
2142 utils.CommaJoin(dangling_instances.get(node.name,
2144 for node in dangling_nodes]
2146 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2148 "the following nodes (and their instances) belong to a non"
2149 " existing group: %s", utils.CommaJoin(pretty_dangling))
2151 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2153 "the following instances have a non-existing primary-node:"
2154 " %s", utils.CommaJoin(no_node_instances))
2159 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2160 """Verifies the status of a node group.
2163 HPATH = "cluster-verify"
2164 HTYPE = constants.HTYPE_CLUSTER
2167 _HOOKS_INDENT_RE = re.compile("^", re.M)
2169 class NodeImage(object):
2170 """A class representing the logical and physical status of a node.
2173 @ivar name: the node name to which this object refers
2174 @ivar volumes: a structure as returned from
2175 L{ganeti.backend.GetVolumeList} (runtime)
2176 @ivar instances: a list of running instances (runtime)
2177 @ivar pinst: list of configured primary instances (config)
2178 @ivar sinst: list of configured secondary instances (config)
2179 @ivar sbp: dictionary of {primary-node: list of instances} for all
2180 instances for which this node is secondary (config)
2181 @ivar mfree: free memory, as reported by hypervisor (runtime)
2182 @ivar dfree: free disk, as reported by the node (runtime)
2183 @ivar offline: the offline status (config)
2184 @type rpc_fail: boolean
2185 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2186 not whether the individual keys were correct) (runtime)
2187 @type lvm_fail: boolean
2188 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2189 @type hyp_fail: boolean
2190 @ivar hyp_fail: whether the RPC call didn't return the instance list
2191 @type ghost: boolean
2192 @ivar ghost: whether this is a known node or not (config)
2193 @type os_fail: boolean
2194 @ivar os_fail: whether the RPC call didn't return valid OS data
2196 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2197 @type vm_capable: boolean
2198 @ivar vm_capable: whether the node can host instances
2201 def __init__(self, offline=False, name=None, vm_capable=True):
2210 self.offline = offline
2211 self.vm_capable = vm_capable
2212 self.rpc_fail = False
2213 self.lvm_fail = False
2214 self.hyp_fail = False
2216 self.os_fail = False
2219 def ExpandNames(self):
2220 # This raises errors.OpPrereqError on its own:
2221 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2223 # Get instances in node group; this is unsafe and needs verification later
2225 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2227 self.needed_locks = {
2228 locking.LEVEL_INSTANCE: inst_names,
2229 locking.LEVEL_NODEGROUP: [self.group_uuid],
2230 locking.LEVEL_NODE: [],
2233 self.share_locks = _ShareAll()
2235 def DeclareLocks(self, level):
2236 if level == locking.LEVEL_NODE:
2237 # Get members of node group; this is unsafe and needs verification later
2238 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2240 all_inst_info = self.cfg.GetAllInstancesInfo()
2242 # In Exec(), we warn about mirrored instances that have primary and
2243 # secondary living in separate node groups. To fully verify that
2244 # volumes for these instances are healthy, we will need to do an
2245 # extra call to their secondaries. We ensure here those nodes will
2247 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2248 # Important: access only the instances whose lock is owned
2249 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2250 nodes.update(all_inst_info[inst].secondary_nodes)
2252 self.needed_locks[locking.LEVEL_NODE] = nodes
2254 def CheckPrereq(self):
2255 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2256 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2258 group_nodes = set(self.group_info.members)
2260 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2263 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2265 unlocked_instances = \
2266 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2269 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2270 utils.CommaJoin(unlocked_nodes),
2273 if unlocked_instances:
2274 raise errors.OpPrereqError("Missing lock for instances: %s" %
2275 utils.CommaJoin(unlocked_instances),
2278 self.all_node_info = self.cfg.GetAllNodesInfo()
2279 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2281 self.my_node_names = utils.NiceSort(group_nodes)
2282 self.my_inst_names = utils.NiceSort(group_instances)
2284 self.my_node_info = dict((name, self.all_node_info[name])
2285 for name in self.my_node_names)
2287 self.my_inst_info = dict((name, self.all_inst_info[name])
2288 for name in self.my_inst_names)
2290 # We detect here the nodes that will need the extra RPC calls for verifying
2291 # split LV volumes; they should be locked.
2292 extra_lv_nodes = set()
2294 for inst in self.my_inst_info.values():
2295 if inst.disk_template in constants.DTS_INT_MIRROR:
2296 for nname in inst.all_nodes:
2297 if self.all_node_info[nname].group != self.group_uuid:
2298 extra_lv_nodes.add(nname)
2300 unlocked_lv_nodes = \
2301 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2303 if unlocked_lv_nodes:
2304 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2305 utils.CommaJoin(unlocked_lv_nodes),
2307 self.extra_lv_nodes = list(extra_lv_nodes)
2309 def _VerifyNode(self, ninfo, nresult):
2310 """Perform some basic validation on data returned from a node.
2312 - check the result data structure is well formed and has all the
2314 - check ganeti version
2316 @type ninfo: L{objects.Node}
2317 @param ninfo: the node to check
2318 @param nresult: the results from the node
2320 @return: whether overall this call was successful (and we can expect
2321 reasonable values in the respose)
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 # main result, nresult should be a non-empty dict
2328 test = not nresult or not isinstance(nresult, dict)
2329 _ErrorIf(test, constants.CV_ENODERPC, node,
2330 "unable to verify node: no data returned")
2334 # compares ganeti version
2335 local_version = constants.PROTOCOL_VERSION
2336 remote_version = nresult.get("version", None)
2337 test = not (remote_version and
2338 isinstance(remote_version, (list, tuple)) and
2339 len(remote_version) == 2)
2340 _ErrorIf(test, constants.CV_ENODERPC, node,
2341 "connection to node returned invalid data")
2345 test = local_version != remote_version[0]
2346 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2347 "incompatible protocol versions: master %s,"
2348 " node %s", local_version, remote_version[0])
2352 # node seems compatible, we can actually try to look into its results
2354 # full package version
2355 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2356 constants.CV_ENODEVERSION, node,
2357 "software version mismatch: master %s, node %s",
2358 constants.RELEASE_VERSION, remote_version[1],
2359 code=self.ETYPE_WARNING)
2361 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2362 if ninfo.vm_capable and isinstance(hyp_result, dict):
2363 for hv_name, hv_result in hyp_result.iteritems():
2364 test = hv_result is not None
2365 _ErrorIf(test, constants.CV_ENODEHV, node,
2366 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2368 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2369 if ninfo.vm_capable and isinstance(hvp_result, list):
2370 for item, hv_name, hv_result in hvp_result:
2371 _ErrorIf(True, constants.CV_ENODEHV, node,
2372 "hypervisor %s parameter verify failure (source %s): %s",
2373 hv_name, item, hv_result)
2375 test = nresult.get(constants.NV_NODESETUP,
2376 ["Missing NODESETUP results"])
2377 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2382 def _VerifyNodeTime(self, ninfo, nresult,
2383 nvinfo_starttime, nvinfo_endtime):
2384 """Check the node time.
2386 @type ninfo: L{objects.Node}
2387 @param ninfo: the node to check
2388 @param nresult: the remote results for the node
2389 @param nvinfo_starttime: the start time of the RPC call
2390 @param nvinfo_endtime: the end time of the RPC call
2394 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396 ntime = nresult.get(constants.NV_TIME, None)
2398 ntime_merged = utils.MergeTime(ntime)
2399 except (ValueError, TypeError):
2400 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2403 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2404 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2405 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2406 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2410 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2411 "Node time diverges by at least %s from master node time",
2414 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2415 """Check the node LVM results.
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param vg_name: the configured VG name
2427 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2429 # checks vg existence and size > 20G
2430 vglist = nresult.get(constants.NV_VGLIST, None)
2432 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2434 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2435 constants.MIN_VG_SIZE)
2436 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2439 pvlist = nresult.get(constants.NV_PVLIST, None)
2440 test = pvlist is None
2441 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2443 # check that ':' is not present in PV names, since it's a
2444 # special character for lvcreate (denotes the range of PEs to
2446 for _, pvname, owner_vg in pvlist:
2447 test = ":" in pvname
2448 _ErrorIf(test, constants.CV_ENODELVM, node,
2449 "Invalid character ':' in PV '%s' of VG '%s'",
2452 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2453 """Check the node bridges.
2455 @type ninfo: L{objects.Node}
2456 @param ninfo: the node to check
2457 @param nresult: the remote results for the node
2458 @param bridges: the expected list of bridges
2465 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2467 missing = nresult.get(constants.NV_BRIDGES, None)
2468 test = not isinstance(missing, list)
2469 _ErrorIf(test, constants.CV_ENODENET, node,
2470 "did not return valid bridge information")
2472 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2473 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2475 def _VerifyNodeUserScripts(self, ninfo, nresult):
2476 """Check the results of user scripts presence and executability on the node
2478 @type ninfo: L{objects.Node}
2479 @param ninfo: the node to check
2480 @param nresult: the remote results for the node
2485 test = not constants.NV_USERSCRIPTS in nresult
2486 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2487 "did not return user scripts information")
2489 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2491 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2492 "user scripts not present or not executable: %s" %
2493 utils.CommaJoin(sorted(broken_scripts)))
2495 def _VerifyNodeNetwork(self, ninfo, nresult):
2496 """Check the node network connectivity results.
2498 @type ninfo: L{objects.Node}
2499 @param ninfo: the node to check
2500 @param nresult: the remote results for the node
2504 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2506 test = constants.NV_NODELIST not in nresult
2507 _ErrorIf(test, constants.CV_ENODESSH, node,
2508 "node hasn't returned node ssh connectivity data")
2510 if nresult[constants.NV_NODELIST]:
2511 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2512 _ErrorIf(True, constants.CV_ENODESSH, node,
2513 "ssh communication with node '%s': %s", a_node, a_msg)
2515 test = constants.NV_NODENETTEST not in nresult
2516 _ErrorIf(test, constants.CV_ENODENET, node,
2517 "node hasn't returned node tcp connectivity data")
2519 if nresult[constants.NV_NODENETTEST]:
2520 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2522 _ErrorIf(True, constants.CV_ENODENET, node,
2523 "tcp communication with node '%s': %s",
2524 anode, nresult[constants.NV_NODENETTEST][anode])
2526 test = constants.NV_MASTERIP not in nresult
2527 _ErrorIf(test, constants.CV_ENODENET, node,
2528 "node hasn't returned node master IP reachability data")
2530 if not nresult[constants.NV_MASTERIP]:
2531 if node == self.master_node:
2532 msg = "the master node cannot reach the master IP (not configured?)"
2534 msg = "cannot reach the master IP"
2535 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2537 def _VerifyInstance(self, instance, instanceconfig, node_image,
2539 """Verify an instance.
2541 This function checks to see if the required block devices are
2542 available on the instance's node.
2545 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2546 node_current = instanceconfig.primary_node
2548 node_vol_should = {}
2549 instanceconfig.MapLVsByNode(node_vol_should)
2551 cluster = self.cfg.GetClusterInfo()
2552 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2554 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2555 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2556 code=self.ETYPE_WARNING)
2558 for node in node_vol_should:
2559 n_img = node_image[node]
2560 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2561 # ignore missing volumes on offline or broken nodes
2563 for volume in node_vol_should[node]:
2564 test = volume not in n_img.volumes
2565 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2566 "volume %s missing on node %s", volume, node)
2568 if instanceconfig.admin_state == constants.ADMINST_UP:
2569 pri_img = node_image[node_current]
2570 test = instance not in pri_img.instances and not pri_img.offline
2571 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2572 "instance not running on its primary node %s",
2575 diskdata = [(nname, success, status, idx)
2576 for (nname, disks) in diskstatus.items()
2577 for idx, (success, status) in enumerate(disks)]
2579 for nname, success, bdev_status, idx in diskdata:
2580 # the 'ghost node' construction in Exec() ensures that we have a
2582 snode = node_image[nname]
2583 bad_snode = snode.ghost or snode.offline
2584 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2585 not success and not bad_snode,
2586 constants.CV_EINSTANCEFAULTYDISK, instance,
2587 "couldn't retrieve status for disk/%s on %s: %s",
2588 idx, nname, bdev_status)
2589 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2590 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2591 constants.CV_EINSTANCEFAULTYDISK, instance,
2592 "disk/%s on %s is faulty", idx, nname)
2594 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2595 """Verify if there are any unknown volumes in the cluster.
2597 The .os, .swap and backup volumes are ignored. All other volumes are
2598 reported as unknown.
2600 @type reserved: L{ganeti.utils.FieldSet}
2601 @param reserved: a FieldSet of reserved volume names
2604 for node, n_img in node_image.items():
2605 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2606 self.all_node_info[node].group != self.group_uuid):
2607 # skip non-healthy nodes
2609 for volume in n_img.volumes:
2610 test = ((node not in node_vol_should or
2611 volume not in node_vol_should[node]) and
2612 not reserved.Matches(volume))
2613 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2614 "volume %s is unknown", volume)
2616 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2617 """Verify N+1 Memory Resilience.
2619 Check that if one single node dies we can still start all the
2620 instances it was primary for.
2623 cluster_info = self.cfg.GetClusterInfo()
2624 for node, n_img in node_image.items():
2625 # This code checks that every node which is now listed as
2626 # secondary has enough memory to host all instances it is
2627 # supposed to should a single other node in the cluster fail.
2628 # FIXME: not ready for failover to an arbitrary node
2629 # FIXME: does not support file-backed instances
2630 # WARNING: we currently take into account down instances as well
2631 # as up ones, considering that even if they're down someone
2632 # might want to start them even in the event of a node failure.
2633 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2634 # we're skipping nodes marked offline and nodes in other groups from
2635 # the N+1 warning, since most likely we don't have good memory
2636 # infromation from them; we already list instances living on such
2637 # nodes, and that's enough warning
2639 #TODO(dynmem): also consider ballooning out other instances
2640 for prinode, instances in n_img.sbp.items():
2642 for instance in instances:
2643 bep = cluster_info.FillBE(instance_cfg[instance])
2644 if bep[constants.BE_AUTO_BALANCE]:
2645 needed_mem += bep[constants.BE_MINMEM]
2646 test = n_img.mfree < needed_mem
2647 self._ErrorIf(test, constants.CV_ENODEN1, node,
2648 "not enough memory to accomodate instance failovers"
2649 " should node %s fail (%dMiB needed, %dMiB available)",
2650 prinode, needed_mem, n_img.mfree)
2653 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2654 (files_all, files_opt, files_mc, files_vm)):
2655 """Verifies file checksums collected from all nodes.
2657 @param errorif: Callback for reporting errors
2658 @param nodeinfo: List of L{objects.Node} objects
2659 @param master_node: Name of master node
2660 @param all_nvinfo: RPC results
2663 # Define functions determining which nodes to consider for a file
2666 (files_mc, lambda node: (node.master_candidate or
2667 node.name == master_node)),
2668 (files_vm, lambda node: node.vm_capable),
2671 # Build mapping from filename to list of nodes which should have the file
2673 for (files, fn) in files2nodefn:
2675 filenodes = nodeinfo
2677 filenodes = filter(fn, nodeinfo)
2678 nodefiles.update((filename,
2679 frozenset(map(operator.attrgetter("name"), filenodes)))
2680 for filename in files)
2682 assert set(nodefiles) == (files_all | files_mc | files_vm)
2684 fileinfo = dict((filename, {}) for filename in nodefiles)
2685 ignore_nodes = set()
2687 for node in nodeinfo:
2689 ignore_nodes.add(node.name)
2692 nresult = all_nvinfo[node.name]
2694 if nresult.fail_msg or not nresult.payload:
2697 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2698 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2699 for (key, value) in fingerprints.items())
2702 test = not (node_files and isinstance(node_files, dict))
2703 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2704 "Node did not return file checksum data")
2706 ignore_nodes.add(node.name)
2709 # Build per-checksum mapping from filename to nodes having it
2710 for (filename, checksum) in node_files.items():
2711 assert filename in nodefiles
2712 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2714 for (filename, checksums) in fileinfo.items():
2715 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2717 # Nodes having the file
2718 with_file = frozenset(node_name
2719 for nodes in fileinfo[filename].values()
2720 for node_name in nodes) - ignore_nodes
2722 expected_nodes = nodefiles[filename] - ignore_nodes
2724 # Nodes missing file
2725 missing_file = expected_nodes - with_file
2727 if filename in files_opt:
2729 errorif(missing_file and missing_file != expected_nodes,
2730 constants.CV_ECLUSTERFILECHECK, None,
2731 "File %s is optional, but it must exist on all or no"
2732 " nodes (not found on %s)",
2733 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2735 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2736 "File %s is missing from node(s) %s", filename,
2737 utils.CommaJoin(utils.NiceSort(missing_file)))
2739 # Warn if a node has a file it shouldn't
2740 unexpected = with_file - expected_nodes
2742 constants.CV_ECLUSTERFILECHECK, None,
2743 "File %s should not exist on node(s) %s",
2744 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2746 # See if there are multiple versions of the file
2747 test = len(checksums) > 1
2749 variants = ["variant %s on %s" %
2750 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2751 for (idx, (checksum, nodes)) in
2752 enumerate(sorted(checksums.items()))]
2756 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2757 "File %s found with %s different checksums (%s)",
2758 filename, len(checksums), "; ".join(variants))
2760 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2762 """Verifies and the node DRBD status.
2764 @type ninfo: L{objects.Node}
2765 @param ninfo: the node to check
2766 @param nresult: the remote results for the node
2767 @param instanceinfo: the dict of instances
2768 @param drbd_helper: the configured DRBD usermode helper
2769 @param drbd_map: the DRBD map as returned by
2770 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2774 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2777 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2778 test = (helper_result is None)
2779 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2780 "no drbd usermode helper returned")
2782 status, payload = helper_result
2784 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785 "drbd usermode helper check unsuccessful: %s", payload)
2786 test = status and (payload != drbd_helper)
2787 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2788 "wrong drbd usermode helper: %s", payload)
2790 # compute the DRBD minors
2792 for minor, instance in drbd_map[node].items():
2793 test = instance not in instanceinfo
2794 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2795 "ghost instance '%s' in temporary DRBD map", instance)
2796 # ghost instance should not be running, but otherwise we
2797 # don't give double warnings (both ghost instance and
2798 # unallocated minor in use)
2800 node_drbd[minor] = (instance, False)
2802 instance = instanceinfo[instance]
2803 node_drbd[minor] = (instance.name,
2804 instance.admin_state == constants.ADMINST_UP)
2806 # and now check them
2807 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2808 test = not isinstance(used_minors, (tuple, list))
2809 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2810 "cannot parse drbd status file: %s", str(used_minors))
2812 # we cannot check drbd status
2815 for minor, (iname, must_exist) in node_drbd.items():
2816 test = minor not in used_minors and must_exist
2817 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2818 "drbd minor %d of instance %s is not active", minor, iname)
2819 for minor in used_minors:
2820 test = minor not in node_drbd
2821 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2822 "unallocated drbd minor %d is in use", minor)
2824 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2825 """Builds the node OS structures.
2827 @type ninfo: L{objects.Node}
2828 @param ninfo: the node to check
2829 @param nresult: the remote results for the node
2830 @param nimg: the node image object
2834 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2836 remote_os = nresult.get(constants.NV_OSLIST, None)
2837 test = (not isinstance(remote_os, list) or
2838 not compat.all(isinstance(v, list) and len(v) == 7
2839 for v in remote_os))
2841 _ErrorIf(test, constants.CV_ENODEOS, node,
2842 "node hasn't returned valid OS data")
2851 for (name, os_path, status, diagnose,
2852 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2854 if name not in os_dict:
2857 # parameters is a list of lists instead of list of tuples due to
2858 # JSON lacking a real tuple type, fix it:
2859 parameters = [tuple(v) for v in parameters]
2860 os_dict[name].append((os_path, status, diagnose,
2861 set(variants), set(parameters), set(api_ver)))
2863 nimg.oslist = os_dict
2865 def _VerifyNodeOS(self, ninfo, nimg, base):
2866 """Verifies the node OS list.
2868 @type ninfo: L{objects.Node}
2869 @param ninfo: the node to check
2870 @param nimg: the node image object
2871 @param base: the 'template' node we match against (e.g. from the master)
2875 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2877 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2879 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2880 for os_name, os_data in nimg.oslist.items():
2881 assert os_data, "Empty OS status for OS %s?!" % os_name
2882 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2883 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2884 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2885 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2886 "OS '%s' has multiple entries (first one shadows the rest): %s",
2887 os_name, utils.CommaJoin([v[0] for v in os_data]))
2888 # comparisons with the 'base' image
2889 test = os_name not in base.oslist
2890 _ErrorIf(test, constants.CV_ENODEOS, node,
2891 "Extra OS %s not present on reference node (%s)",
2895 assert base.oslist[os_name], "Base node has empty OS status?"
2896 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2898 # base OS is invalid, skipping
2900 for kind, a, b in [("API version", f_api, b_api),
2901 ("variants list", f_var, b_var),
2902 ("parameters", beautify_params(f_param),
2903 beautify_params(b_param))]:
2904 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2905 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2906 kind, os_name, base.name,
2907 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2909 # check any missing OSes
2910 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2911 _ErrorIf(missing, constants.CV_ENODEOS, node,
2912 "OSes present on reference node %s but missing on this node: %s",
2913 base.name, utils.CommaJoin(missing))
2915 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2916 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2918 @type ninfo: L{objects.Node}
2919 @param ninfo: the node to check
2920 @param nresult: the remote results for the node
2921 @type is_master: bool
2922 @param is_master: Whether node is the master node
2928 (constants.ENABLE_FILE_STORAGE or
2929 constants.ENABLE_SHARED_FILE_STORAGE)):
2931 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2933 # This should never happen
2934 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2935 "Node did not return forbidden file storage paths")
2937 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2938 "Found forbidden file storage paths: %s",
2939 utils.CommaJoin(fspaths))
2941 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2942 constants.CV_ENODEFILESTORAGEPATHS, node,
2943 "Node should not have returned forbidden file storage"
2946 def _VerifyOob(self, ninfo, nresult):
2947 """Verifies out of band functionality of a node.
2949 @type ninfo: L{objects.Node}
2950 @param ninfo: the node to check
2951 @param nresult: the remote results for the node
2955 # We just have to verify the paths on master and/or master candidates
2956 # as the oob helper is invoked on the master
2957 if ((ninfo.master_candidate or ninfo.master_capable) and
2958 constants.NV_OOB_PATHS in nresult):
2959 for path_result in nresult[constants.NV_OOB_PATHS]:
2960 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2962 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2963 """Verifies and updates the node volume data.
2965 This function will update a L{NodeImage}'s internal structures
2966 with data from the remote call.
2968 @type ninfo: L{objects.Node}
2969 @param ninfo: the node to check
2970 @param nresult: the remote results for the node
2971 @param nimg: the node image object
2972 @param vg_name: the configured VG name
2976 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2978 nimg.lvm_fail = True
2979 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2982 elif isinstance(lvdata, basestring):
2983 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2984 utils.SafeEncode(lvdata))
2985 elif not isinstance(lvdata, dict):
2986 _ErrorIf(True, constants.CV_ENODELVM, node,
2987 "rpc call to node failed (lvlist)")
2989 nimg.volumes = lvdata
2990 nimg.lvm_fail = False
2992 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2993 """Verifies and updates the node instance list.
2995 If the listing was successful, then updates this node's instance
2996 list. Otherwise, it marks the RPC call as failed for the instance
2999 @type ninfo: L{objects.Node}
3000 @param ninfo: the node to check
3001 @param nresult: the remote results for the node
3002 @param nimg: the node image object
3005 idata = nresult.get(constants.NV_INSTANCELIST, None)
3006 test = not isinstance(idata, list)
3007 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3008 "rpc call to node failed (instancelist): %s",
3009 utils.SafeEncode(str(idata)))
3011 nimg.hyp_fail = True
3013 nimg.instances = idata
3015 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3016 """Verifies and computes a node information map
3018 @type ninfo: L{objects.Node}
3019 @param ninfo: the node to check
3020 @param nresult: the remote results for the node
3021 @param nimg: the node image object
3022 @param vg_name: the configured VG name
3026 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3028 # try to read free memory (from the hypervisor)
3029 hv_info = nresult.get(constants.NV_HVINFO, None)
3030 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3031 _ErrorIf(test, constants.CV_ENODEHV, node,
3032 "rpc call to node failed (hvinfo)")
3035 nimg.mfree = int(hv_info["memory_free"])
3036 except (ValueError, TypeError):
3037 _ErrorIf(True, constants.CV_ENODERPC, node,
3038 "node returned invalid nodeinfo, check hypervisor")
3040 # FIXME: devise a free space model for file based instances as well
3041 if vg_name is not None:
3042 test = (constants.NV_VGLIST not in nresult or
3043 vg_name not in nresult[constants.NV_VGLIST])
3044 _ErrorIf(test, constants.CV_ENODELVM, node,
3045 "node didn't return data for the volume group '%s'"
3046 " - it is either missing or broken", vg_name)
3049 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3050 except (ValueError, TypeError):
3051 _ErrorIf(True, constants.CV_ENODERPC, node,
3052 "node returned invalid LVM info, check LVM status")
3054 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3055 """Gets per-disk status information for all instances.
3057 @type nodelist: list of strings
3058 @param nodelist: Node names
3059 @type node_image: dict of (name, L{objects.Node})
3060 @param node_image: Node objects
3061 @type instanceinfo: dict of (name, L{objects.Instance})
3062 @param instanceinfo: Instance objects
3063 @rtype: {instance: {node: [(succes, payload)]}}
3064 @return: a dictionary of per-instance dictionaries with nodes as
3065 keys and disk information as values; the disk information is a
3066 list of tuples (success, payload)
3069 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3072 node_disks_devonly = {}
3073 diskless_instances = set()
3074 diskless = constants.DT_DISKLESS
3076 for nname in nodelist:
3077 node_instances = list(itertools.chain(node_image[nname].pinst,
3078 node_image[nname].sinst))
3079 diskless_instances.update(inst for inst in node_instances
3080 if instanceinfo[inst].disk_template == diskless)
3081 disks = [(inst, disk)
3082 for inst in node_instances
3083 for disk in instanceinfo[inst].disks]
3086 # No need to collect data
3089 node_disks[nname] = disks
3091 # _AnnotateDiskParams makes already copies of the disks
3093 for (inst, dev) in disks:
3094 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3095 self.cfg.SetDiskID(anno_disk, nname)
3096 devonly.append(anno_disk)
3098 node_disks_devonly[nname] = devonly
3100 assert len(node_disks) == len(node_disks_devonly)
3102 # Collect data from all nodes with disks
3103 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3106 assert len(result) == len(node_disks)
3110 for (nname, nres) in result.items():
3111 disks = node_disks[nname]
3114 # No data from this node
3115 data = len(disks) * [(False, "node offline")]
3118 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3119 "while getting disk information: %s", msg)
3121 # No data from this node
3122 data = len(disks) * [(False, msg)]
3125 for idx, i in enumerate(nres.payload):
3126 if isinstance(i, (tuple, list)) and len(i) == 2:
3129 logging.warning("Invalid result from node %s, entry %d: %s",
3131 data.append((False, "Invalid result from the remote node"))
3133 for ((inst, _), status) in zip(disks, data):
3134 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3136 # Add empty entries for diskless instances.
3137 for inst in diskless_instances:
3138 assert inst not in instdisk
3141 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3142 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3143 compat.all(isinstance(s, (tuple, list)) and
3144 len(s) == 2 for s in statuses)
3145 for inst, nnames in instdisk.items()
3146 for nname, statuses in nnames.items())
3147 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3152 def _SshNodeSelector(group_uuid, all_nodes):
3153 """Create endless iterators for all potential SSH check hosts.
3156 nodes = [node for node in all_nodes
3157 if (node.group != group_uuid and
3159 keyfunc = operator.attrgetter("group")
3161 return map(itertools.cycle,
3162 [sorted(map(operator.attrgetter("name"), names))
3163 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3167 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3168 """Choose which nodes should talk to which other nodes.
3170 We will make nodes contact all nodes in their group, and one node from
3173 @warning: This algorithm has a known issue if one node group is much
3174 smaller than others (e.g. just one node). In such a case all other
3175 nodes will talk to the single node.
3178 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3179 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3181 return (online_nodes,
3182 dict((name, sorted([i.next() for i in sel]))
3183 for name in online_nodes))
3185 def BuildHooksEnv(self):
3188 Cluster-Verify hooks just ran in the post phase and their failure makes
3189 the output be logged in the verify output and the verification to fail.
3193 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3196 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3197 for node in self.my_node_info.values())
3201 def BuildHooksNodes(self):
3202 """Build hooks nodes.
3205 return ([], self.my_node_names)
3207 def Exec(self, feedback_fn):
3208 """Verify integrity of the node group, performing various test on nodes.
3211 # This method has too many local variables. pylint: disable=R0914
3212 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3214 if not self.my_node_names:
3216 feedback_fn("* Empty node group, skipping verification")
3220 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3221 verbose = self.op.verbose
3222 self._feedback_fn = feedback_fn
3224 vg_name = self.cfg.GetVGName()
3225 drbd_helper = self.cfg.GetDRBDHelper()
3226 cluster = self.cfg.GetClusterInfo()
3227 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3228 hypervisors = cluster.enabled_hypervisors
3229 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3231 i_non_redundant = [] # Non redundant instances
3232 i_non_a_balanced = [] # Non auto-balanced instances
3233 i_offline = 0 # Count of offline instances
3234 n_offline = 0 # Count of offline nodes
3235 n_drained = 0 # Count of nodes being drained
3236 node_vol_should = {}
3238 # FIXME: verify OS list
3241 filemap = _ComputeAncillaryFiles(cluster, False)
3243 # do local checksums
3244 master_node = self.master_node = self.cfg.GetMasterNode()
3245 master_ip = self.cfg.GetMasterIP()
3247 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3250 if self.cfg.GetUseExternalMipScript():
3251 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3253 node_verify_param = {
3254 constants.NV_FILELIST:
3255 map(vcluster.MakeVirtualPath,
3256 utils.UniqueSequence(filename
3257 for files in filemap
3258 for filename in files)),
3259 constants.NV_NODELIST:
3260 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3261 self.all_node_info.values()),
3262 constants.NV_HYPERVISOR: hypervisors,
3263 constants.NV_HVPARAMS:
3264 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3265 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3266 for node in node_data_list
3267 if not node.offline],
3268 constants.NV_INSTANCELIST: hypervisors,
3269 constants.NV_VERSION: None,
3270 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3271 constants.NV_NODESETUP: None,
3272 constants.NV_TIME: None,
3273 constants.NV_MASTERIP: (master_node, master_ip),
3274 constants.NV_OSLIST: None,
3275 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3276 constants.NV_USERSCRIPTS: user_scripts,
3279 if vg_name is not None:
3280 node_verify_param[constants.NV_VGLIST] = None
3281 node_verify_param[constants.NV_LVLIST] = vg_name
3282 node_verify_param[constants.NV_PVLIST] = [vg_name]
3285 node_verify_param[constants.NV_DRBDLIST] = None
3286 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3288 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3289 # Load file storage paths only from master node
3290 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3293 # FIXME: this needs to be changed per node-group, not cluster-wide
3295 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3296 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3297 bridges.add(default_nicpp[constants.NIC_LINK])
3298 for instance in self.my_inst_info.values():
3299 for nic in instance.nics:
3300 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3301 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3302 bridges.add(full_nic[constants.NIC_LINK])
3305 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3307 # Build our expected cluster state
3308 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3310 vm_capable=node.vm_capable))
3311 for node in node_data_list)
3315 for node in self.all_node_info.values():
3316 path = _SupportsOob(self.cfg, node)
3317 if path and path not in oob_paths:
3318 oob_paths.append(path)
3321 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3323 for instance in self.my_inst_names:
3324 inst_config = self.my_inst_info[instance]
3325 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3328 for nname in inst_config.all_nodes:
3329 if nname not in node_image:
3330 gnode = self.NodeImage(name=nname)
3331 gnode.ghost = (nname not in self.all_node_info)
3332 node_image[nname] = gnode
3334 inst_config.MapLVsByNode(node_vol_should)
3336 pnode = inst_config.primary_node
3337 node_image[pnode].pinst.append(instance)
3339 for snode in inst_config.secondary_nodes:
3340 nimg = node_image[snode]
3341 nimg.sinst.append(instance)
3342 if pnode not in nimg.sbp:
3343 nimg.sbp[pnode] = []
3344 nimg.sbp[pnode].append(instance)
3346 # At this point, we have the in-memory data structures complete,
3347 # except for the runtime information, which we'll gather next
3349 # Due to the way our RPC system works, exact response times cannot be
3350 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3351 # time before and after executing the request, we can at least have a time
3353 nvinfo_starttime = time.time()
3354 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3356 self.cfg.GetClusterName())
3357 nvinfo_endtime = time.time()
3359 if self.extra_lv_nodes and vg_name is not None:
3361 self.rpc.call_node_verify(self.extra_lv_nodes,
3362 {constants.NV_LVLIST: vg_name},
3363 self.cfg.GetClusterName())
3365 extra_lv_nvinfo = {}
3367 all_drbd_map = self.cfg.ComputeDRBDMap()
3369 feedback_fn("* Gathering disk information (%s nodes)" %
3370 len(self.my_node_names))
3371 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3374 feedback_fn("* Verifying configuration file consistency")
3376 # If not all nodes are being checked, we need to make sure the master node
3377 # and a non-checked vm_capable node are in the list.
3378 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3380 vf_nvinfo = all_nvinfo.copy()
3381 vf_node_info = list(self.my_node_info.values())
3382 additional_nodes = []
3383 if master_node not in self.my_node_info:
3384 additional_nodes.append(master_node)
3385 vf_node_info.append(self.all_node_info[master_node])
3386 # Add the first vm_capable node we find which is not included,
3387 # excluding the master node (which we already have)
3388 for node in absent_nodes:
3389 nodeinfo = self.all_node_info[node]
3390 if (nodeinfo.vm_capable and not nodeinfo.offline and
3391 node != master_node):
3392 additional_nodes.append(node)
3393 vf_node_info.append(self.all_node_info[node])
3395 key = constants.NV_FILELIST
3396 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3397 {key: node_verify_param[key]},
3398 self.cfg.GetClusterName()))
3400 vf_nvinfo = all_nvinfo
3401 vf_node_info = self.my_node_info.values()
3403 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3405 feedback_fn("* Verifying node status")
3409 for node_i in node_data_list:
3411 nimg = node_image[node]
3415 feedback_fn("* Skipping offline node %s" % (node,))
3419 if node == master_node:
3421 elif node_i.master_candidate:
3422 ntype = "master candidate"
3423 elif node_i.drained:
3429 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3431 msg = all_nvinfo[node].fail_msg
3432 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3435 nimg.rpc_fail = True
3438 nresult = all_nvinfo[node].payload
3440 nimg.call_ok = self._VerifyNode(node_i, nresult)
3441 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3442 self._VerifyNodeNetwork(node_i, nresult)
3443 self._VerifyNodeUserScripts(node_i, nresult)
3444 self._VerifyOob(node_i, nresult)
3445 self._VerifyFileStoragePaths(node_i, nresult,
3446 node == master_node)
3449 self._VerifyNodeLVM(node_i, nresult, vg_name)
3450 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3453 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3454 self._UpdateNodeInstances(node_i, nresult, nimg)
3455 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3456 self._UpdateNodeOS(node_i, nresult, nimg)
3458 if not nimg.os_fail:
3459 if refos_img is None:
3461 self._VerifyNodeOS(node_i, nimg, refos_img)
3462 self._VerifyNodeBridges(node_i, nresult, bridges)
3464 # Check whether all running instancies are primary for the node. (This
3465 # can no longer be done from _VerifyInstance below, since some of the
3466 # wrong instances could be from other node groups.)
3467 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3469 for inst in non_primary_inst:
3470 test = inst in self.all_inst_info
3471 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3472 "instance should not run on node %s", node_i.name)
3473 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3474 "node is running unknown instance %s", inst)
3476 for node, result in extra_lv_nvinfo.items():
3477 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3478 node_image[node], vg_name)
3480 feedback_fn("* Verifying instance status")
3481 for instance in self.my_inst_names:
3483 feedback_fn("* Verifying instance %s" % instance)
3484 inst_config = self.my_inst_info[instance]
3485 self._VerifyInstance(instance, inst_config, node_image,
3487 inst_nodes_offline = []
3489 pnode = inst_config.primary_node
3490 pnode_img = node_image[pnode]
3491 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3492 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3493 " primary node failed", instance)
3495 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3497 constants.CV_EINSTANCEBADNODE, instance,
3498 "instance is marked as running and lives on offline node %s",
3499 inst_config.primary_node)
3501 # If the instance is non-redundant we cannot survive losing its primary
3502 # node, so we are not N+1 compliant.
3503 if inst_config.disk_template not in constants.DTS_MIRRORED:
3504 i_non_redundant.append(instance)
3506 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3507 constants.CV_EINSTANCELAYOUT,
3508 instance, "instance has multiple secondary nodes: %s",
3509 utils.CommaJoin(inst_config.secondary_nodes),
3510 code=self.ETYPE_WARNING)
3512 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3513 pnode = inst_config.primary_node
3514 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3515 instance_groups = {}
3517 for node in instance_nodes:
3518 instance_groups.setdefault(self.all_node_info[node].group,
3522 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3523 # Sort so that we always list the primary node first.
3524 for group, nodes in sorted(instance_groups.items(),
3525 key=lambda (_, nodes): pnode in nodes,
3528 self._ErrorIf(len(instance_groups) > 1,
3529 constants.CV_EINSTANCESPLITGROUPS,
3530 instance, "instance has primary and secondary nodes in"
3531 " different groups: %s", utils.CommaJoin(pretty_list),
3532 code=self.ETYPE_WARNING)
3534 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3535 i_non_a_balanced.append(instance)
3537 for snode in inst_config.secondary_nodes:
3538 s_img = node_image[snode]
3539 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3540 snode, "instance %s, connection to secondary node failed",
3544 inst_nodes_offline.append(snode)
3546 # warn that the instance lives on offline nodes
3547 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3548 "instance has offline secondary node(s) %s",
3549 utils.CommaJoin(inst_nodes_offline))
3550 # ... or ghost/non-vm_capable nodes
3551 for node in inst_config.all_nodes:
3552 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3553 instance, "instance lives on ghost node %s", node)
3554 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3555 instance, "instance lives on non-vm_capable node %s", node)
3557 feedback_fn("* Verifying orphan volumes")
3558 reserved = utils.FieldSet(*cluster.reserved_lvs)
3560 # We will get spurious "unknown volume" warnings if any node of this group
3561 # is secondary for an instance whose primary is in another group. To avoid
3562 # them, we find these instances and add their volumes to node_vol_should.
3563 for inst in self.all_inst_info.values():
3564 for secondary in inst.secondary_nodes:
3565 if (secondary in self.my_node_info
3566 and inst.name not in self.my_inst_info):
3567 inst.MapLVsByNode(node_vol_should)
3570 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3572 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3573 feedback_fn("* Verifying N+1 Memory redundancy")
3574 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3576 feedback_fn("* Other Notes")
3578 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3579 % len(i_non_redundant))
3581 if i_non_a_balanced:
3582 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3583 % len(i_non_a_balanced))
3586 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3589 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3592 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3596 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3597 """Analyze the post-hooks' result
3599 This method analyses the hook result, handles it, and sends some
3600 nicely-formatted feedback back to the user.
3602 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3603 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3604 @param hooks_results: the results of the multi-node hooks rpc call
3605 @param feedback_fn: function used send feedback back to the caller
3606 @param lu_result: previous Exec result
3607 @return: the new Exec result, based on the previous result
3611 # We only really run POST phase hooks, only for non-empty groups,
3612 # and are only interested in their results
3613 if not self.my_node_names:
3616 elif phase == constants.HOOKS_PHASE_POST:
3617 # Used to change hooks' output to proper indentation
3618 feedback_fn("* Hooks Results")
3619 assert hooks_results, "invalid result from hooks"
3621 for node_name in hooks_results:
3622 res = hooks_results[node_name]
3624 test = msg and not res.offline
3625 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3626 "Communication failure in hooks execution: %s", msg)
3627 if res.offline or msg:
3628 # No need to investigate payload if node is offline or gave
3631 for script, hkr, output in res.payload:
3632 test = hkr == constants.HKR_FAIL
3633 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3634 "Script %s failed, output:", script)
3636 output = self._HOOKS_INDENT_RE.sub(" ", output)
3637 feedback_fn("%s" % output)
3643 class LUClusterVerifyDisks(NoHooksLU):
3644 """Verifies the cluster disks status.
3649 def ExpandNames(self):
3650 self.share_locks = _ShareAll()
3651 self.needed_locks = {
3652 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3655 def Exec(self, feedback_fn):
3656 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3658 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3659 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3660 for group in group_names])
3663 class LUGroupVerifyDisks(NoHooksLU):
3664 """Verifies the status of all disks in a node group.
3669 def ExpandNames(self):
3670 # Raises errors.OpPrereqError on its own if group can't be found
3671 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3673 self.share_locks = _ShareAll()
3674 self.needed_locks = {
3675 locking.LEVEL_INSTANCE: [],
3676 locking.LEVEL_NODEGROUP: [],
3677 locking.LEVEL_NODE: [],
3680 def DeclareLocks(self, level):
3681 if level == locking.LEVEL_INSTANCE:
3682 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3684 # Lock instances optimistically, needs verification once node and group
3685 # locks have been acquired
3686 self.needed_locks[locking.LEVEL_INSTANCE] = \
3687 self.cfg.GetNodeGroupInstances(self.group_uuid)
3689 elif level == locking.LEVEL_NODEGROUP:
3690 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3692 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3693 set([self.group_uuid] +
3694 # Lock all groups used by instances optimistically; this requires
3695 # going via the node before it's locked, requiring verification
3698 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3699 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3701 elif level == locking.LEVEL_NODE:
3702 # This will only lock the nodes in the group to be verified which contain
3704 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3705 self._LockInstancesNodes()
3707 # Lock all nodes in group to be verified
3708 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3709 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3710 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3712 def CheckPrereq(self):
3713 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3714 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3715 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3717 assert self.group_uuid in owned_groups
3719 # Check if locked instances are still correct
3720 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3722 # Get instance information
3723 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3725 # Check if node groups for locked instances are still correct
3726 _CheckInstancesNodeGroups(self.cfg, self.instances,
3727 owned_groups, owned_nodes, self.group_uuid)
3729 def Exec(self, feedback_fn):
3730 """Verify integrity of cluster disks.
3732 @rtype: tuple of three items
3733 @return: a tuple of (dict of node-to-node_error, list of instances
3734 which need activate-disks, dict of instance: (node, volume) for
3739 res_instances = set()
3742 nv_dict = _MapInstanceDisksToNodes(
3743 [inst for inst in self.instances.values()
3744 if inst.admin_state == constants.ADMINST_UP])
3747 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3748 set(self.cfg.GetVmCapableNodeList()))
3750 node_lvs = self.rpc.call_lv_list(nodes, [])
3752 for (node, node_res) in node_lvs.items():
3753 if node_res.offline:
3756 msg = node_res.fail_msg
3758 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3759 res_nodes[node] = msg
3762 for lv_name, (_, _, lv_online) in node_res.payload.items():
3763 inst = nv_dict.pop((node, lv_name), None)
3764 if not (lv_online or inst is None):
3765 res_instances.add(inst)
3767 # any leftover items in nv_dict are missing LVs, let's arrange the data
3769 for key, inst in nv_dict.iteritems():
3770 res_missing.setdefault(inst, []).append(list(key))
3772 return (res_nodes, list(res_instances), res_missing)
3775 class LUClusterRepairDiskSizes(NoHooksLU):
3776 """Verifies the cluster disks sizes.
3781 def ExpandNames(self):
3782 if self.op.instances:
3783 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3784 self.needed_locks = {
3785 locking.LEVEL_NODE_RES: [],
3786 locking.LEVEL_INSTANCE: self.wanted_names,
3788 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3790 self.wanted_names = None
3791 self.needed_locks = {
3792 locking.LEVEL_NODE_RES: locking.ALL_SET,
3793 locking.LEVEL_INSTANCE: locking.ALL_SET,
3795 self.share_locks = {
3796 locking.LEVEL_NODE_RES: 1,
3797 locking.LEVEL_INSTANCE: 0,
3800 def DeclareLocks(self, level):
3801 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3802 self._LockInstancesNodes(primary_only=True, level=level)
3804 def CheckPrereq(self):
3805 """Check prerequisites.
3807 This only checks the optional instance list against the existing names.
3810 if self.wanted_names is None:
3811 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3813 self.wanted_instances = \
3814 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3816 def _EnsureChildSizes(self, disk):
3817 """Ensure children of the disk have the needed disk size.
3819 This is valid mainly for DRBD8 and fixes an issue where the
3820 children have smaller disk size.
3822 @param disk: an L{ganeti.objects.Disk} object
3825 if disk.dev_type == constants.LD_DRBD8:
3826 assert disk.children, "Empty children for DRBD8?"
3827 fchild = disk.children[0]
3828 mismatch = fchild.size < disk.size
3830 self.LogInfo("Child disk has size %d, parent %d, fixing",
3831 fchild.size, disk.size)
3832 fchild.size = disk.size
3834 # and we recurse on this child only, not on the metadev
3835 return self._EnsureChildSizes(fchild) or mismatch
3839 def Exec(self, feedback_fn):
3840 """Verify the size of cluster disks.
3843 # TODO: check child disks too
3844 # TODO: check differences in size between primary/secondary nodes
3846 for instance in self.wanted_instances:
3847 pnode = instance.primary_node
3848 if pnode not in per_node_disks:
3849 per_node_disks[pnode] = []
3850 for idx, disk in enumerate(instance.disks):
3851 per_node_disks[pnode].append((instance, idx, disk))
3853 assert not (frozenset(per_node_disks.keys()) -
3854 self.owned_locks(locking.LEVEL_NODE_RES)), \
3855 "Not owning correct locks"
3856 assert not self.owned_locks(locking.LEVEL_NODE)
3859 for node, dskl in per_node_disks.items():
3860 newl = [v[2].Copy() for v in dskl]
3862 self.cfg.SetDiskID(dsk, node)
3863 result = self.rpc.call_blockdev_getsize(node, newl)
3865 self.LogWarning("Failure in blockdev_getsize call to node"
3866 " %s, ignoring", node)
3868 if len(result.payload) != len(dskl):
3869 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3870 " result.payload=%s", node, len(dskl), result.payload)
3871 self.LogWarning("Invalid result from node %s, ignoring node results",
3874 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3876 self.LogWarning("Disk %d of instance %s did not return size"
3877 " information, ignoring", idx, instance.name)
3879 if not isinstance(size, (int, long)):
3880 self.LogWarning("Disk %d of instance %s did not return valid"
3881 " size information, ignoring", idx, instance.name)
3884 if size != disk.size:
3885 self.LogInfo("Disk %d of instance %s has mismatched size,"
3886 " correcting: recorded %d, actual %d", idx,
3887 instance.name, disk.size, size)
3889 self.cfg.Update(instance, feedback_fn)
3890 changed.append((instance.name, idx, size))
3891 if self._EnsureChildSizes(disk):
3892 self.cfg.Update(instance, feedback_fn)
3893 changed.append((instance.name, idx, disk.size))
3897 class LUClusterRename(LogicalUnit):
3898 """Rename the cluster.
3901 HPATH = "cluster-rename"
3902 HTYPE = constants.HTYPE_CLUSTER
3904 def BuildHooksEnv(self):
3909 "OP_TARGET": self.cfg.GetClusterName(),
3910 "NEW_NAME": self.op.name,
3913 def BuildHooksNodes(self):
3914 """Build hooks nodes.
3917 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3919 def CheckPrereq(self):
3920 """Verify that the passed name is a valid one.
3923 hostname = netutils.GetHostname(name=self.op.name,
3924 family=self.cfg.GetPrimaryIPFamily())
3926 new_name = hostname.name
3927 self.ip = new_ip = hostname.ip
3928 old_name = self.cfg.GetClusterName()
3929 old_ip = self.cfg.GetMasterIP()
3930 if new_name == old_name and new_ip == old_ip:
3931 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3932 " cluster has changed",
3934 if new_ip != old_ip:
3935 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3936 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3937 " reachable on the network" %
3938 new_ip, errors.ECODE_NOTUNIQUE)
3940 self.op.name = new_name
3942 def Exec(self, feedback_fn):
3943 """Rename the cluster.
3946 clustername = self.op.name
3949 # shutdown the master IP
3950 master_params = self.cfg.GetMasterNetworkParameters()
3951 ems = self.cfg.GetUseExternalMipScript()
3952 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3954 result.Raise("Could not disable the master role")
3957 cluster = self.cfg.GetClusterInfo()
3958 cluster.cluster_name = clustername
3959 cluster.master_ip = new_ip
3960 self.cfg.Update(cluster, feedback_fn)
3962 # update the known hosts file
3963 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3964 node_list = self.cfg.GetOnlineNodeList()
3966 node_list.remove(master_params.name)
3969 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3971 master_params.ip = new_ip
3972 result = self.rpc.call_node_activate_master_ip(master_params.name,
3974 msg = result.fail_msg
3976 self.LogWarning("Could not re-enable the master role on"
3977 " the master, please restart manually: %s", msg)
3982 def _ValidateNetmask(cfg, netmask):
3983 """Checks if a netmask is valid.
3985 @type cfg: L{config.ConfigWriter}
3986 @param cfg: The cluster configuration
3988 @param netmask: the netmask to be verified
3989 @raise errors.OpPrereqError: if the validation fails
3992 ip_family = cfg.GetPrimaryIPFamily()
3994 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3995 except errors.ProgrammerError:
3996 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3997 ip_family, errors.ECODE_INVAL)
3998 if not ipcls.ValidateNetmask(netmask):
3999 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4000 (netmask), errors.ECODE_INVAL)
4003 class LUClusterSetParams(LogicalUnit):
4004 """Change the parameters of the cluster.
4007 HPATH = "cluster-modify"
4008 HTYPE = constants.HTYPE_CLUSTER
4011 def CheckArguments(self):
4015 if self.op.uid_pool:
4016 uidpool.CheckUidPool(self.op.uid_pool)
4018 if self.op.add_uids:
4019 uidpool.CheckUidPool(self.op.add_uids)
4021 if self.op.remove_uids:
4022 uidpool.CheckUidPool(self.op.remove_uids)
4024 if self.op.master_netmask is not None:
4025 _ValidateNetmask(self.cfg, self.op.master_netmask)
4027 if self.op.diskparams:
4028 for dt_params in self.op.diskparams.values():
4029 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4031 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4032 except errors.OpPrereqError, err:
4033 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4036 def ExpandNames(self):
4037 # FIXME: in the future maybe other cluster params won't require checking on
4038 # all nodes to be modified.
4039 self.needed_locks = {
4040 locking.LEVEL_NODE: locking.ALL_SET,
4041 locking.LEVEL_INSTANCE: locking.ALL_SET,
4042 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4044 self.share_locks = {
4045 locking.LEVEL_NODE: 1,
4046 locking.LEVEL_INSTANCE: 1,
4047 locking.LEVEL_NODEGROUP: 1,
4050 def BuildHooksEnv(self):
4055 "OP_TARGET": self.cfg.GetClusterName(),
4056 "NEW_VG_NAME": self.op.vg_name,
4059 def BuildHooksNodes(self):
4060 """Build hooks nodes.
4063 mn = self.cfg.GetMasterNode()
4066 def CheckPrereq(self):
4067 """Check prerequisites.
4069 This checks whether the given params don't conflict and
4070 if the given volume group is valid.
4073 if self.op.vg_name is not None and not self.op.vg_name:
4074 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4075 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4076 " instances exist", errors.ECODE_INVAL)
4078 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4079 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4080 raise errors.OpPrereqError("Cannot disable drbd helper while"
4081 " drbd-based instances exist",
4084 node_list = self.owned_locks(locking.LEVEL_NODE)
4086 # if vg_name not None, checks given volume group on all nodes
4088 vglist = self.rpc.call_vg_list(node_list)
4089 for node in node_list:
4090 msg = vglist[node].fail_msg
4092 # ignoring down node
4093 self.LogWarning("Error while gathering data on node %s"
4094 " (ignoring node): %s", node, msg)
4096 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4098 constants.MIN_VG_SIZE)
4100 raise errors.OpPrereqError("Error on node '%s': %s" %
4101 (node, vgstatus), errors.ECODE_ENVIRON)
4103 if self.op.drbd_helper:
4104 # checks given drbd helper on all nodes
4105 helpers = self.rpc.call_drbd_helper(node_list)
4106 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4108 self.LogInfo("Not checking drbd helper on offline node %s", node)
4110 msg = helpers[node].fail_msg
4112 raise errors.OpPrereqError("Error checking drbd helper on node"
4113 " '%s': %s" % (node, msg),
4114 errors.ECODE_ENVIRON)
4115 node_helper = helpers[node].payload
4116 if node_helper != self.op.drbd_helper:
4117 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4118 (node, node_helper), errors.ECODE_ENVIRON)
4120 self.cluster = cluster = self.cfg.GetClusterInfo()
4121 # validate params changes
4122 if self.op.beparams:
4123 objects.UpgradeBeParams(self.op.beparams)
4124 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4125 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4127 if self.op.ndparams:
4128 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4129 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4131 # TODO: we need a more general way to handle resetting
4132 # cluster-level parameters to default values
4133 if self.new_ndparams["oob_program"] == "":
4134 self.new_ndparams["oob_program"] = \
4135 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4137 if self.op.hv_state:
4138 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4139 self.cluster.hv_state_static)
4140 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4141 for hv, values in new_hv_state.items())
4143 if self.op.disk_state:
4144 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4145 self.cluster.disk_state_static)
4146 self.new_disk_state = \
4147 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4148 for name, values in svalues.items()))
4149 for storage, svalues in new_disk_state.items())
4152 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4155 all_instances = self.cfg.GetAllInstancesInfo().values()
4157 for group in self.cfg.GetAllNodeGroupsInfo().values():
4158 instances = frozenset([inst for inst in all_instances
4159 if compat.any(node in group.members
4160 for node in inst.all_nodes)])
4161 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4162 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4163 new = _ComputeNewInstanceViolations(ipol,
4164 new_ipolicy, instances)
4166 violations.update(new)
4169 self.LogWarning("After the ipolicy change the following instances"
4170 " violate them: %s",
4171 utils.CommaJoin(utils.NiceSort(violations)))
4173 if self.op.nicparams:
4174 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4175 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4176 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4179 # check all instances for consistency
4180 for instance in self.cfg.GetAllInstancesInfo().values():
4181 for nic_idx, nic in enumerate(instance.nics):
4182 params_copy = copy.deepcopy(nic.nicparams)
4183 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4185 # check parameter syntax
4187 objects.NIC.CheckParameterSyntax(params_filled)
4188 except errors.ConfigurationError, err:
4189 nic_errors.append("Instance %s, nic/%d: %s" %
4190 (instance.name, nic_idx, err))
4192 # if we're moving instances to routed, check that they have an ip
4193 target_mode = params_filled[constants.NIC_MODE]
4194 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4195 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4196 " address" % (instance.name, nic_idx))
4198 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4199 "\n".join(nic_errors), errors.ECODE_INVAL)
4201 # hypervisor list/parameters
4202 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4203 if self.op.hvparams:
4204 for hv_name, hv_dict in self.op.hvparams.items():
4205 if hv_name not in self.new_hvparams:
4206 self.new_hvparams[hv_name] = hv_dict
4208 self.new_hvparams[hv_name].update(hv_dict)
4210 # disk template parameters
4211 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4212 if self.op.diskparams:
4213 for dt_name, dt_params in self.op.diskparams.items():
4214 if dt_name not in self.op.diskparams:
4215 self.new_diskparams[dt_name] = dt_params
4217 self.new_diskparams[dt_name].update(dt_params)
4219 # os hypervisor parameters
4220 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4222 for os_name, hvs in self.op.os_hvp.items():
4223 if os_name not in self.new_os_hvp:
4224 self.new_os_hvp[os_name] = hvs
4226 for hv_name, hv_dict in hvs.items():
4227 if hv_name not in self.new_os_hvp[os_name]:
4228 self.new_os_hvp[os_name][hv_name] = hv_dict
4230 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4233 self.new_osp = objects.FillDict(cluster.osparams, {})
4234 if self.op.osparams:
4235 for os_name, osp in self.op.osparams.items():
4236 if os_name not in self.new_osp:
4237 self.new_osp[os_name] = {}
4239 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4242 if not self.new_osp[os_name]:
4243 # we removed all parameters
4244 del self.new_osp[os_name]
4246 # check the parameter validity (remote check)
4247 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4248 os_name, self.new_osp[os_name])
4250 # changes to the hypervisor list
4251 if self.op.enabled_hypervisors is not None:
4252 self.hv_list = self.op.enabled_hypervisors
4253 for hv in self.hv_list:
4254 # if the hypervisor doesn't already exist in the cluster
4255 # hvparams, we initialize it to empty, and then (in both
4256 # cases) we make sure to fill the defaults, as we might not
4257 # have a complete defaults list if the hypervisor wasn't
4259 if hv not in new_hvp:
4261 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4262 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4264 self.hv_list = cluster.enabled_hypervisors
4266 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4267 # either the enabled list has changed, or the parameters have, validate
4268 for hv_name, hv_params in self.new_hvparams.items():
4269 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4270 (self.op.enabled_hypervisors and
4271 hv_name in self.op.enabled_hypervisors)):
4272 # either this is a new hypervisor, or its parameters have changed
4273 hv_class = hypervisor.GetHypervisor(hv_name)
4274 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4275 hv_class.CheckParameterSyntax(hv_params)
4276 _CheckHVParams(self, node_list, hv_name, hv_params)
4279 # no need to check any newly-enabled hypervisors, since the
4280 # defaults have already been checked in the above code-block
4281 for os_name, os_hvp in self.new_os_hvp.items():
4282 for hv_name, hv_params in os_hvp.items():
4283 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4284 # we need to fill in the new os_hvp on top of the actual hv_p
4285 cluster_defaults = self.new_hvparams.get(hv_name, {})
4286 new_osp = objects.FillDict(cluster_defaults, hv_params)
4287 hv_class = hypervisor.GetHypervisor(hv_name)
4288 hv_class.CheckParameterSyntax(new_osp)
4289 _CheckHVParams(self, node_list, hv_name, new_osp)
4291 if self.op.default_iallocator:
4292 alloc_script = utils.FindFile(self.op.default_iallocator,
4293 constants.IALLOCATOR_SEARCH_PATH,
4295 if alloc_script is None:
4296 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4297 " specified" % self.op.default_iallocator,
4300 def Exec(self, feedback_fn):
4301 """Change the parameters of the cluster.
4304 if self.op.vg_name is not None:
4305 new_volume = self.op.vg_name
4308 if new_volume != self.cfg.GetVGName():
4309 self.cfg.SetVGName(new_volume)
4311 feedback_fn("Cluster LVM configuration already in desired"
4312 " state, not changing")
4313 if self.op.drbd_helper is not None:
4314 new_helper = self.op.drbd_helper
4317 if new_helper != self.cfg.GetDRBDHelper():
4318 self.cfg.SetDRBDHelper(new_helper)
4320 feedback_fn("Cluster DRBD helper already in desired state,"
4322 if self.op.hvparams:
4323 self.cluster.hvparams = self.new_hvparams
4325 self.cluster.os_hvp = self.new_os_hvp
4326 if self.op.enabled_hypervisors is not None:
4327 self.cluster.hvparams = self.new_hvparams
4328 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4329 if self.op.beparams:
4330 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4331 if self.op.nicparams:
4332 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4334 self.cluster.ipolicy = self.new_ipolicy
4335 if self.op.osparams:
4336 self.cluster.osparams = self.new_osp
4337 if self.op.ndparams:
4338 self.cluster.ndparams = self.new_ndparams
4339 if self.op.diskparams:
4340 self.cluster.diskparams = self.new_diskparams
4341 if self.op.hv_state:
4342 self.cluster.hv_state_static = self.new_hv_state
4343 if self.op.disk_state:
4344 self.cluster.disk_state_static = self.new_disk_state
4346 if self.op.candidate_pool_size is not None:
4347 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4348 # we need to update the pool size here, otherwise the save will fail
4349 _AdjustCandidatePool(self, [])
4351 if self.op.maintain_node_health is not None:
4352 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4353 feedback_fn("Note: CONFD was disabled at build time, node health"
4354 " maintenance is not useful (still enabling it)")
4355 self.cluster.maintain_node_health = self.op.maintain_node_health
4357 if self.op.prealloc_wipe_disks is not None:
4358 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4360 if self.op.add_uids is not None:
4361 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4363 if self.op.remove_uids is not None:
4364 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4366 if self.op.uid_pool is not None:
4367 self.cluster.uid_pool = self.op.uid_pool
4369 if self.op.default_iallocator is not None:
4370 self.cluster.default_iallocator = self.op.default_iallocator
4372 if self.op.reserved_lvs is not None:
4373 self.cluster.reserved_lvs = self.op.reserved_lvs
4375 if self.op.use_external_mip_script is not None:
4376 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4378 def helper_os(aname, mods, desc):
4380 lst = getattr(self.cluster, aname)
4381 for key, val in mods:
4382 if key == constants.DDM_ADD:
4384 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4387 elif key == constants.DDM_REMOVE:
4391 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4393 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4395 if self.op.hidden_os:
4396 helper_os("hidden_os", self.op.hidden_os, "hidden")
4398 if self.op.blacklisted_os:
4399 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4401 if self.op.master_netdev:
4402 master_params = self.cfg.GetMasterNetworkParameters()
4403 ems = self.cfg.GetUseExternalMipScript()
4404 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4405 self.cluster.master_netdev)
4406 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4408 result.Raise("Could not disable the master ip")
4409 feedback_fn("Changing master_netdev from %s to %s" %
4410 (master_params.netdev, self.op.master_netdev))
4411 self.cluster.master_netdev = self.op.master_netdev
4413 if self.op.master_netmask:
4414 master_params = self.cfg.GetMasterNetworkParameters()
4415 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4416 result = self.rpc.call_node_change_master_netmask(master_params.name,
4417 master_params.netmask,
4418 self.op.master_netmask,
4420 master_params.netdev)
4422 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4425 self.cluster.master_netmask = self.op.master_netmask
4427 self.cfg.Update(self.cluster, feedback_fn)
4429 if self.op.master_netdev:
4430 master_params = self.cfg.GetMasterNetworkParameters()
4431 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4432 self.op.master_netdev)
4433 ems = self.cfg.GetUseExternalMipScript()
4434 result = self.rpc.call_node_activate_master_ip(master_params.name,
4437 self.LogWarning("Could not re-enable the master ip on"
4438 " the master, please restart manually: %s",
4442 def _UploadHelper(lu, nodes, fname):
4443 """Helper for uploading a file and showing warnings.
4446 if os.path.exists(fname):
4447 result = lu.rpc.call_upload_file(nodes, fname)
4448 for to_node, to_result in result.items():
4449 msg = to_result.fail_msg
4451 msg = ("Copy of file %s to node %s failed: %s" %
4452 (fname, to_node, msg))
4456 def _ComputeAncillaryFiles(cluster, redist):
4457 """Compute files external to Ganeti which need to be consistent.
4459 @type redist: boolean
4460 @param redist: Whether to include files which need to be redistributed
4463 # Compute files for all nodes
4465 pathutils.SSH_KNOWN_HOSTS_FILE,
4466 pathutils.CONFD_HMAC_KEY,
4467 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4468 pathutils.SPICE_CERT_FILE,
4469 pathutils.SPICE_CACERT_FILE,
4470 pathutils.RAPI_USERS_FILE,
4474 # we need to ship at least the RAPI certificate
4475 files_all.add(pathutils.RAPI_CERT_FILE)
4477 files_all.update(pathutils.ALL_CERT_FILES)
4478 files_all.update(ssconf.SimpleStore().GetFileList())
4480 if cluster.modify_etc_hosts:
4481 files_all.add(pathutils.ETC_HOSTS)
4483 if cluster.use_external_mip_script:
4484 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4486 # Files which are optional, these must:
4487 # - be present in one other category as well
4488 # - either exist or not exist on all nodes of that category (mc, vm all)
4490 pathutils.RAPI_USERS_FILE,
4493 # Files which should only be on master candidates
4497 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4501 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4502 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4503 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4505 # Files which should only be on VM-capable nodes
4508 for hv_name in cluster.enabled_hypervisors
4509 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4513 for hv_name in cluster.enabled_hypervisors
4514 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4516 # Filenames in each category must be unique
4517 all_files_set = files_all | files_mc | files_vm
4518 assert (len(all_files_set) ==
4519 sum(map(len, [files_all, files_mc, files_vm]))), \
4520 "Found file listed in more than one file list"
4522 # Optional files must be present in one other category
4523 assert all_files_set.issuperset(files_opt), \
4524 "Optional file not in a different required list"
4526 # This one file should never ever be re-distributed via RPC
4527 assert not (redist and
4528 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4530 return (files_all, files_opt, files_mc, files_vm)
4533 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4534 """Distribute additional files which are part of the cluster configuration.
4536 ConfigWriter takes care of distributing the config and ssconf files, but
4537 there are more files which should be distributed to all nodes. This function
4538 makes sure those are copied.
4540 @param lu: calling logical unit
4541 @param additional_nodes: list of nodes not in the config to distribute to
4542 @type additional_vm: boolean
4543 @param additional_vm: whether the additional nodes are vm-capable or not
4546 # Gather target nodes
4547 cluster = lu.cfg.GetClusterInfo()
4548 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4550 online_nodes = lu.cfg.GetOnlineNodeList()
4551 online_set = frozenset(online_nodes)
4552 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4554 if additional_nodes is not None:
4555 online_nodes.extend(additional_nodes)
4557 vm_nodes.extend(additional_nodes)
4559 # Never distribute to master node
4560 for nodelist in [online_nodes, vm_nodes]:
4561 if master_info.name in nodelist:
4562 nodelist.remove(master_info.name)
4565 (files_all, _, files_mc, files_vm) = \
4566 _ComputeAncillaryFiles(cluster, True)
4568 # Never re-distribute configuration file from here
4569 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4570 pathutils.CLUSTER_CONF_FILE in files_vm)
4571 assert not files_mc, "Master candidates not handled in this function"
4574 (online_nodes, files_all),
4575 (vm_nodes, files_vm),
4579 for (node_list, files) in filemap:
4581 _UploadHelper(lu, node_list, fname)
4584 class LUClusterRedistConf(NoHooksLU):
4585 """Force the redistribution of cluster configuration.
4587 This is a very simple LU.
4592 def ExpandNames(self):
4593 self.needed_locks = {
4594 locking.LEVEL_NODE: locking.ALL_SET,
4596 self.share_locks[locking.LEVEL_NODE] = 1
4598 def Exec(self, feedback_fn):
4599 """Redistribute the configuration.
4602 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4603 _RedistributeAncillaryFiles(self)
4606 class LUClusterActivateMasterIp(NoHooksLU):
4607 """Activate the master IP on the master node.
4610 def Exec(self, feedback_fn):
4611 """Activate the master IP.
4614 master_params = self.cfg.GetMasterNetworkParameters()
4615 ems = self.cfg.GetUseExternalMipScript()
4616 result = self.rpc.call_node_activate_master_ip(master_params.name,
4618 result.Raise("Could not activate the master IP")
4621 class LUClusterDeactivateMasterIp(NoHooksLU):
4622 """Deactivate the master IP on the master node.
4625 def Exec(self, feedback_fn):
4626 """Deactivate the master IP.
4629 master_params = self.cfg.GetMasterNetworkParameters()
4630 ems = self.cfg.GetUseExternalMipScript()
4631 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4633 result.Raise("Could not deactivate the master IP")
4636 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4637 """Sleep and poll for an instance's disk to sync.
4640 if not instance.disks or disks is not None and not disks:
4643 disks = _ExpandCheckDisks(instance, disks)
4646 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4648 node = instance.primary_node
4651 lu.cfg.SetDiskID(dev, node)
4653 # TODO: Convert to utils.Retry
4656 degr_retries = 10 # in seconds, as we sleep 1 second each time
4660 cumul_degraded = False
4661 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4662 msg = rstats.fail_msg
4664 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4667 raise errors.RemoteError("Can't contact node %s for mirror data,"
4668 " aborting." % node)
4671 rstats = rstats.payload
4673 for i, mstat in enumerate(rstats):
4675 lu.LogWarning("Can't compute data for node %s/%s",
4676 node, disks[i].iv_name)
4679 cumul_degraded = (cumul_degraded or
4680 (mstat.is_degraded and mstat.sync_percent is None))
4681 if mstat.sync_percent is not None:
4683 if mstat.estimated_time is not None:
4684 rem_time = ("%s remaining (estimated)" %
4685 utils.FormatSeconds(mstat.estimated_time))
4686 max_time = mstat.estimated_time
4688 rem_time = "no time estimate"
4689 lu.LogInfo("- device %s: %5.2f%% done, %s",
4690 disks[i].iv_name, mstat.sync_percent, rem_time)
4692 # if we're done but degraded, let's do a few small retries, to
4693 # make sure we see a stable and not transient situation; therefore
4694 # we force restart of the loop
4695 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4696 logging.info("Degraded disks found, %d retries left", degr_retries)
4704 time.sleep(min(60, max_time))
4707 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4709 return not cumul_degraded
4712 def _BlockdevFind(lu, node, dev, instance):
4713 """Wrapper around call_blockdev_find to annotate diskparams.
4715 @param lu: A reference to the lu object
4716 @param node: The node to call out
4717 @param dev: The device to find
4718 @param instance: The instance object the device belongs to
4719 @returns The result of the rpc call
4722 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4723 return lu.rpc.call_blockdev_find(node, disk)
4726 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4727 """Wrapper around L{_CheckDiskConsistencyInner}.
4730 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4731 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4735 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4737 """Check that mirrors are not degraded.
4739 @attention: The device has to be annotated already.
4741 The ldisk parameter, if True, will change the test from the
4742 is_degraded attribute (which represents overall non-ok status for
4743 the device(s)) to the ldisk (representing the local storage status).
4746 lu.cfg.SetDiskID(dev, node)
4750 if on_primary or dev.AssembleOnSecondary():
4751 rstats = lu.rpc.call_blockdev_find(node, dev)
4752 msg = rstats.fail_msg
4754 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4756 elif not rstats.payload:
4757 lu.LogWarning("Can't find disk on node %s", node)
4761 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4763 result = result and not rstats.payload.is_degraded
4766 for child in dev.children:
4767 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4773 class LUOobCommand(NoHooksLU):
4774 """Logical unit for OOB handling.
4778 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4780 def ExpandNames(self):
4781 """Gather locks we need.
4784 if self.op.node_names:
4785 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4786 lock_names = self.op.node_names
4788 lock_names = locking.ALL_SET
4790 self.needed_locks = {
4791 locking.LEVEL_NODE: lock_names,
4794 def CheckPrereq(self):
4795 """Check prerequisites.
4798 - the node exists in the configuration
4801 Any errors are signaled by raising errors.OpPrereqError.
4805 self.master_node = self.cfg.GetMasterNode()
4807 assert self.op.power_delay >= 0.0
4809 if self.op.node_names:
4810 if (self.op.command in self._SKIP_MASTER and
4811 self.master_node in self.op.node_names):
4812 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4813 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4815 if master_oob_handler:
4816 additional_text = ("run '%s %s %s' if you want to operate on the"
4817 " master regardless") % (master_oob_handler,
4821 additional_text = "it does not support out-of-band operations"
4823 raise errors.OpPrereqError(("Operating on the master node %s is not"
4824 " allowed for %s; %s") %
4825 (self.master_node, self.op.command,
4826 additional_text), errors.ECODE_INVAL)
4828 self.op.node_names = self.cfg.GetNodeList()
4829 if self.op.command in self._SKIP_MASTER:
4830 self.op.node_names.remove(self.master_node)
4832 if self.op.command in self._SKIP_MASTER:
4833 assert self.master_node not in self.op.node_names
4835 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4837 raise errors.OpPrereqError("Node %s not found" % node_name,
4840 self.nodes.append(node)
4842 if (not self.op.ignore_status and
4843 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4844 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4845 " not marked offline") % node_name,
4848 def Exec(self, feedback_fn):
4849 """Execute OOB and return result if we expect any.
4852 master_node = self.master_node
4855 for idx, node in enumerate(utils.NiceSort(self.nodes,
4856 key=lambda node: node.name)):
4857 node_entry = [(constants.RS_NORMAL, node.name)]
4858 ret.append(node_entry)
4860 oob_program = _SupportsOob(self.cfg, node)
4863 node_entry.append((constants.RS_UNAVAIL, None))
4866 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4867 self.op.command, oob_program, node.name)
4868 result = self.rpc.call_run_oob(master_node, oob_program,
4869 self.op.command, node.name,
4873 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4874 node.name, result.fail_msg)
4875 node_entry.append((constants.RS_NODATA, None))
4878 self._CheckPayload(result)
4879 except errors.OpExecError, err:
4880 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4882 node_entry.append((constants.RS_NODATA, None))
4884 if self.op.command == constants.OOB_HEALTH:
4885 # For health we should log important events
4886 for item, status in result.payload:
4887 if status in [constants.OOB_STATUS_WARNING,
4888 constants.OOB_STATUS_CRITICAL]:
4889 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4890 item, node.name, status)
4892 if self.op.command == constants.OOB_POWER_ON:
4894 elif self.op.command == constants.OOB_POWER_OFF:
4895 node.powered = False
4896 elif self.op.command == constants.OOB_POWER_STATUS:
4897 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4898 if powered != node.powered:
4899 logging.warning(("Recorded power state (%s) of node '%s' does not"
4900 " match actual power state (%s)"), node.powered,
4903 # For configuration changing commands we should update the node
4904 if self.op.command in (constants.OOB_POWER_ON,
4905 constants.OOB_POWER_OFF):
4906 self.cfg.Update(node, feedback_fn)
4908 node_entry.append((constants.RS_NORMAL, result.payload))
4910 if (self.op.command == constants.OOB_POWER_ON and
4911 idx < len(self.nodes) - 1):
4912 time.sleep(self.op.power_delay)
4916 def _CheckPayload(self, result):
4917 """Checks if the payload is valid.
4919 @param result: RPC result
4920 @raises errors.OpExecError: If payload is not valid
4924 if self.op.command == constants.OOB_HEALTH:
4925 if not isinstance(result.payload, list):
4926 errs.append("command 'health' is expected to return a list but got %s" %
4927 type(result.payload))
4929 for item, status in result.payload:
4930 if status not in constants.OOB_STATUSES:
4931 errs.append("health item '%s' has invalid status '%s'" %
4934 if self.op.command == constants.OOB_POWER_STATUS:
4935 if not isinstance(result.payload, dict):
4936 errs.append("power-status is expected to return a dict but got %s" %
4937 type(result.payload))
4939 if self.op.command in [
4940 constants.OOB_POWER_ON,
4941 constants.OOB_POWER_OFF,
4942 constants.OOB_POWER_CYCLE,
4944 if result.payload is not None:
4945 errs.append("%s is expected to not return payload but got '%s'" %
4946 (self.op.command, result.payload))
4949 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4950 utils.CommaJoin(errs))
4953 class _OsQuery(_QueryBase):
4954 FIELDS = query.OS_FIELDS
4956 def ExpandNames(self, lu):
4957 # Lock all nodes in shared mode
4958 # Temporary removal of locks, should be reverted later
4959 # TODO: reintroduce locks when they are lighter-weight
4960 lu.needed_locks = {}
4961 #self.share_locks[locking.LEVEL_NODE] = 1
4962 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4964 # The following variables interact with _QueryBase._GetNames
4966 self.wanted = self.names
4968 self.wanted = locking.ALL_SET
4970 self.do_locking = self.use_locking
4972 def DeclareLocks(self, lu, level):
4976 def _DiagnoseByOS(rlist):
4977 """Remaps a per-node return list into an a per-os per-node dictionary
4979 @param rlist: a map with node names as keys and OS objects as values
4982 @return: a dictionary with osnames as keys and as value another
4983 map, with nodes as keys and tuples of (path, status, diagnose,
4984 variants, parameters, api_versions) as values, eg::
4986 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4987 (/srv/..., False, "invalid api")],
4988 "node2": [(/srv/..., True, "", [], [])]}
4993 # we build here the list of nodes that didn't fail the RPC (at RPC
4994 # level), so that nodes with a non-responding node daemon don't
4995 # make all OSes invalid
4996 good_nodes = [node_name for node_name in rlist
4997 if not rlist[node_name].fail_msg]
4998 for node_name, nr in rlist.items():
4999 if nr.fail_msg or not nr.payload:
5001 for (name, path, status, diagnose, variants,
5002 params, api_versions) in nr.payload:
5003 if name not in all_os:
5004 # build a list of nodes for this os containing empty lists
5005 # for each node in node_list
5007 for nname in good_nodes:
5008 all_os[name][nname] = []
5009 # convert params from [name, help] to (name, help)
5010 params = [tuple(v) for v in params]
5011 all_os[name][node_name].append((path, status, diagnose,
5012 variants, params, api_versions))
5015 def _GetQueryData(self, lu):
5016 """Computes the list of nodes and their attributes.
5019 # Locking is not used
5020 assert not (compat.any(lu.glm.is_owned(level)
5021 for level in locking.LEVELS
5022 if level != locking.LEVEL_CLUSTER) or
5023 self.do_locking or self.use_locking)
5025 valid_nodes = [node.name
5026 for node in lu.cfg.GetAllNodesInfo().values()
5027 if not node.offline and node.vm_capable]
5028 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5029 cluster = lu.cfg.GetClusterInfo()
5033 for (os_name, os_data) in pol.items():
5034 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5035 hidden=(os_name in cluster.hidden_os),
5036 blacklisted=(os_name in cluster.blacklisted_os))
5040 api_versions = set()
5042 for idx, osl in enumerate(os_data.values()):
5043 info.valid = bool(info.valid and osl and osl[0][1])
5047 (node_variants, node_params, node_api) = osl[0][3:6]
5050 variants.update(node_variants)
5051 parameters.update(node_params)
5052 api_versions.update(node_api)
5054 # Filter out inconsistent values
5055 variants.intersection_update(node_variants)
5056 parameters.intersection_update(node_params)
5057 api_versions.intersection_update(node_api)
5059 info.variants = list(variants)
5060 info.parameters = list(parameters)
5061 info.api_versions = list(api_versions)
5063 data[os_name] = info
5065 # Prepare data in requested order
5066 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5070 class LUOsDiagnose(NoHooksLU):
5071 """Logical unit for OS diagnose/query.
5077 def _BuildFilter(fields, names):
5078 """Builds a filter for querying OSes.
5081 name_filter = qlang.MakeSimpleFilter("name", names)
5083 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5084 # respective field is not requested
5085 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5086 for fname in ["hidden", "blacklisted"]
5087 if fname not in fields]
5088 if "valid" not in fields:
5089 status_filter.append([qlang.OP_TRUE, "valid"])
5092 status_filter.insert(0, qlang.OP_AND)
5094 status_filter = None
5096 if name_filter and status_filter:
5097 return [qlang.OP_AND, name_filter, status_filter]
5101 return status_filter
5103 def CheckArguments(self):
5104 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5105 self.op.output_fields, False)
5107 def ExpandNames(self):
5108 self.oq.ExpandNames(self)
5110 def Exec(self, feedback_fn):
5111 return self.oq.OldStyleQuery(self)
5114 class LUNodeRemove(LogicalUnit):
5115 """Logical unit for removing a node.
5118 HPATH = "node-remove"
5119 HTYPE = constants.HTYPE_NODE
5121 def BuildHooksEnv(self):
5126 "OP_TARGET": self.op.node_name,
5127 "NODE_NAME": self.op.node_name,
5130 def BuildHooksNodes(self):
5131 """Build hooks nodes.
5133 This doesn't run on the target node in the pre phase as a failed
5134 node would then be impossible to remove.
5137 all_nodes = self.cfg.GetNodeList()
5139 all_nodes.remove(self.op.node_name)
5142 return (all_nodes, all_nodes)
5144 def CheckPrereq(self):
5145 """Check prerequisites.
5148 - the node exists in the configuration
5149 - it does not have primary or secondary instances
5150 - it's not the master
5152 Any errors are signaled by raising errors.OpPrereqError.
5155 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5156 node = self.cfg.GetNodeInfo(self.op.node_name)
5157 assert node is not None
5159 masternode = self.cfg.GetMasterNode()
5160 if node.name == masternode:
5161 raise errors.OpPrereqError("Node is the master node, failover to another"
5162 " node is required", errors.ECODE_INVAL)
5164 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5165 if node.name in instance.all_nodes:
5166 raise errors.OpPrereqError("Instance %s is still running on the node,"
5167 " please remove first" % instance_name,
5169 self.op.node_name = node.name
5172 def Exec(self, feedback_fn):
5173 """Removes the node from the cluster.
5177 logging.info("Stopping the node daemon and removing configs from node %s",
5180 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5182 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5185 # Promote nodes to master candidate as needed
5186 _AdjustCandidatePool(self, exceptions=[node.name])
5187 self.context.RemoveNode(node.name)
5189 # Run post hooks on the node before it's removed
5190 _RunPostHook(self, node.name)
5192 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5193 msg = result.fail_msg
5195 self.LogWarning("Errors encountered on the remote node while leaving"
5196 " the cluster: %s", msg)
5198 # Remove node from our /etc/hosts
5199 if self.cfg.GetClusterInfo().modify_etc_hosts:
5200 master_node = self.cfg.GetMasterNode()
5201 result = self.rpc.call_etc_hosts_modify(master_node,
5202 constants.ETC_HOSTS_REMOVE,
5204 result.Raise("Can't update hosts file with new host data")
5205 _RedistributeAncillaryFiles(self)
5208 class _NodeQuery(_QueryBase):
5209 FIELDS = query.NODE_FIELDS
5211 def ExpandNames(self, lu):
5212 lu.needed_locks = {}
5213 lu.share_locks = _ShareAll()
5216 self.wanted = _GetWantedNodes(lu, self.names)
5218 self.wanted = locking.ALL_SET
5220 self.do_locking = (self.use_locking and
5221 query.NQ_LIVE in self.requested_data)
5224 # If any non-static field is requested we need to lock the nodes
5225 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5227 def DeclareLocks(self, lu, level):
5230 def _GetQueryData(self, lu):
5231 """Computes the list of nodes and their attributes.
5234 all_info = lu.cfg.GetAllNodesInfo()
5236 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5238 # Gather data as requested
5239 if query.NQ_LIVE in self.requested_data:
5240 # filter out non-vm_capable nodes
5241 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5243 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5244 [lu.cfg.GetHypervisorType()])
5245 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5246 for (name, nresult) in node_data.items()
5247 if not nresult.fail_msg and nresult.payload)
5251 if query.NQ_INST in self.requested_data:
5252 node_to_primary = dict([(name, set()) for name in nodenames])
5253 node_to_secondary = dict([(name, set()) for name in nodenames])
5255 inst_data = lu.cfg.GetAllInstancesInfo()
5257 for inst in inst_data.values():
5258 if inst.primary_node in node_to_primary:
5259 node_to_primary[inst.primary_node].add(inst.name)
5260 for secnode in inst.secondary_nodes:
5261 if secnode in node_to_secondary:
5262 node_to_secondary[secnode].add(inst.name)
5264 node_to_primary = None
5265 node_to_secondary = None
5267 if query.NQ_OOB in self.requested_data:
5268 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5269 for name, node in all_info.iteritems())
5273 if query.NQ_GROUP in self.requested_data:
5274 groups = lu.cfg.GetAllNodeGroupsInfo()
5278 return query.NodeQueryData([all_info[name] for name in nodenames],
5279 live_data, lu.cfg.GetMasterNode(),
5280 node_to_primary, node_to_secondary, groups,
5281 oob_support, lu.cfg.GetClusterInfo())
5284 class LUNodeQuery(NoHooksLU):
5285 """Logical unit for querying nodes.
5288 # pylint: disable=W0142
5291 def CheckArguments(self):
5292 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5293 self.op.output_fields, self.op.use_locking)
5295 def ExpandNames(self):
5296 self.nq.ExpandNames(self)
5298 def DeclareLocks(self, level):
5299 self.nq.DeclareLocks(self, level)
5301 def Exec(self, feedback_fn):
5302 return self.nq.OldStyleQuery(self)
5305 class LUNodeQueryvols(NoHooksLU):
5306 """Logical unit for getting volumes on node(s).
5310 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5311 _FIELDS_STATIC = utils.FieldSet("node")
5313 def CheckArguments(self):
5314 _CheckOutputFields(static=self._FIELDS_STATIC,
5315 dynamic=self._FIELDS_DYNAMIC,
5316 selected=self.op.output_fields)
5318 def ExpandNames(self):
5319 self.share_locks = _ShareAll()
5320 self.needed_locks = {}
5322 if not self.op.nodes:
5323 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5325 self.needed_locks[locking.LEVEL_NODE] = \
5326 _GetWantedNodes(self, self.op.nodes)
5328 def Exec(self, feedback_fn):
5329 """Computes the list of nodes and their attributes.
5332 nodenames = self.owned_locks(locking.LEVEL_NODE)
5333 volumes = self.rpc.call_node_volumes(nodenames)
5335 ilist = self.cfg.GetAllInstancesInfo()
5336 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5339 for node in nodenames:
5340 nresult = volumes[node]
5343 msg = nresult.fail_msg
5345 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5348 node_vols = sorted(nresult.payload,
5349 key=operator.itemgetter("dev"))
5351 for vol in node_vols:
5353 for field in self.op.output_fields:
5356 elif field == "phys":
5360 elif field == "name":
5362 elif field == "size":
5363 val = int(float(vol["size"]))
5364 elif field == "instance":
5365 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5367 raise errors.ParameterError(field)
5368 node_output.append(str(val))
5370 output.append(node_output)
5375 class LUNodeQueryStorage(NoHooksLU):
5376 """Logical unit for getting information on storage units on node(s).
5379 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5382 def CheckArguments(self):
5383 _CheckOutputFields(static=self._FIELDS_STATIC,
5384 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5385 selected=self.op.output_fields)
5387 def ExpandNames(self):
5388 self.share_locks = _ShareAll()
5391 self.needed_locks = {
5392 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5395 self.needed_locks = {
5396 locking.LEVEL_NODE: locking.ALL_SET,
5397 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5400 def Exec(self, feedback_fn):
5401 """Computes the list of nodes and their attributes.
5404 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5406 # Always get name to sort by
5407 if constants.SF_NAME in self.op.output_fields:
5408 fields = self.op.output_fields[:]
5410 fields = [constants.SF_NAME] + self.op.output_fields
5412 # Never ask for node or type as it's only known to the LU
5413 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5414 while extra in fields:
5415 fields.remove(extra)
5417 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5418 name_idx = field_idx[constants.SF_NAME]
5420 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5421 data = self.rpc.call_storage_list(self.nodes,
5422 self.op.storage_type, st_args,
5423 self.op.name, fields)
5427 for node in utils.NiceSort(self.nodes):
5428 nresult = data[node]
5432 msg = nresult.fail_msg
5434 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5437 rows = dict([(row[name_idx], row) for row in nresult.payload])
5439 for name in utils.NiceSort(rows.keys()):
5444 for field in self.op.output_fields:
5445 if field == constants.SF_NODE:
5447 elif field == constants.SF_TYPE:
5448 val = self.op.storage_type
5449 elif field in field_idx:
5450 val = row[field_idx[field]]
5452 raise errors.ParameterError(field)
5461 class _InstanceQuery(_QueryBase):
5462 FIELDS = query.INSTANCE_FIELDS
5464 def ExpandNames(self, lu):
5465 lu.needed_locks = {}
5466 lu.share_locks = _ShareAll()
5469 self.wanted = _GetWantedInstances(lu, self.names)
5471 self.wanted = locking.ALL_SET
5473 self.do_locking = (self.use_locking and
5474 query.IQ_LIVE in self.requested_data)
5476 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5477 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5478 lu.needed_locks[locking.LEVEL_NODE] = []
5479 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5481 self.do_grouplocks = (self.do_locking and
5482 query.IQ_NODES in self.requested_data)
5484 def DeclareLocks(self, lu, level):
5486 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5487 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5489 # Lock all groups used by instances optimistically; this requires going
5490 # via the node before it's locked, requiring verification later on
5491 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5493 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5494 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5495 elif level == locking.LEVEL_NODE:
5496 lu._LockInstancesNodes() # pylint: disable=W0212
5499 def _CheckGroupLocks(lu):
5500 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5501 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5503 # Check if node groups for locked instances are still correct
5504 for instance_name in owned_instances:
5505 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5507 def _GetQueryData(self, lu):
5508 """Computes the list of instances and their attributes.
5511 if self.do_grouplocks:
5512 self._CheckGroupLocks(lu)
5514 cluster = lu.cfg.GetClusterInfo()
5515 all_info = lu.cfg.GetAllInstancesInfo()
5517 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5519 instance_list = [all_info[name] for name in instance_names]
5520 nodes = frozenset(itertools.chain(*(inst.all_nodes
5521 for inst in instance_list)))
5522 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5525 wrongnode_inst = set()
5527 # Gather data as requested
5528 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5530 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5532 result = node_data[name]
5534 # offline nodes will be in both lists
5535 assert result.fail_msg
5536 offline_nodes.append(name)
5538 bad_nodes.append(name)
5539 elif result.payload:
5540 for inst in result.payload:
5541 if inst in all_info:
5542 if all_info[inst].primary_node == name:
5543 live_data.update(result.payload)
5545 wrongnode_inst.add(inst)
5547 # orphan instance; we don't list it here as we don't
5548 # handle this case yet in the output of instance listing
5549 logging.warning("Orphan instance '%s' found on node %s",
5551 # else no instance is alive
5555 if query.IQ_DISKUSAGE in self.requested_data:
5556 gmi = ganeti.masterd.instance
5557 disk_usage = dict((inst.name,
5558 gmi.ComputeDiskSize(inst.disk_template,
5559 [{constants.IDISK_SIZE: disk.size}
5560 for disk in inst.disks]))
5561 for inst in instance_list)
5565 if query.IQ_CONSOLE in self.requested_data:
5567 for inst in instance_list:
5568 if inst.name in live_data:
5569 # Instance is running
5570 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5572 consinfo[inst.name] = None
5573 assert set(consinfo.keys()) == set(instance_names)
5577 if query.IQ_NODES in self.requested_data:
5578 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5580 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5581 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5582 for uuid in set(map(operator.attrgetter("group"),
5588 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5589 disk_usage, offline_nodes, bad_nodes,
5590 live_data, wrongnode_inst, consinfo,
5594 class LUQuery(NoHooksLU):
5595 """Query for resources/items of a certain kind.
5598 # pylint: disable=W0142
5601 def CheckArguments(self):
5602 qcls = _GetQueryImplementation(self.op.what)
5604 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5606 def ExpandNames(self):
5607 self.impl.ExpandNames(self)
5609 def DeclareLocks(self, level):
5610 self.impl.DeclareLocks(self, level)
5612 def Exec(self, feedback_fn):
5613 return self.impl.NewStyleQuery(self)
5616 class LUQueryFields(NoHooksLU):
5617 """Query for resources/items of a certain kind.
5620 # pylint: disable=W0142
5623 def CheckArguments(self):
5624 self.qcls = _GetQueryImplementation(self.op.what)
5626 def ExpandNames(self):
5627 self.needed_locks = {}
5629 def Exec(self, feedback_fn):
5630 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5633 class LUNodeModifyStorage(NoHooksLU):
5634 """Logical unit for modifying a storage volume on a node.
5639 def CheckArguments(self):
5640 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5642 storage_type = self.op.storage_type
5645 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5647 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5648 " modified" % storage_type,
5651 diff = set(self.op.changes.keys()) - modifiable
5653 raise errors.OpPrereqError("The following fields can not be modified for"
5654 " storage units of type '%s': %r" %
5655 (storage_type, list(diff)),
5658 def ExpandNames(self):
5659 self.needed_locks = {
5660 locking.LEVEL_NODE: self.op.node_name,
5663 def Exec(self, feedback_fn):
5664 """Computes the list of nodes and their attributes.
5667 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5668 result = self.rpc.call_storage_modify(self.op.node_name,
5669 self.op.storage_type, st_args,
5670 self.op.name, self.op.changes)
5671 result.Raise("Failed to modify storage unit '%s' on %s" %
5672 (self.op.name, self.op.node_name))
5675 class LUNodeAdd(LogicalUnit):
5676 """Logical unit for adding node to the cluster.
5680 HTYPE = constants.HTYPE_NODE
5681 _NFLAGS = ["master_capable", "vm_capable"]
5683 def CheckArguments(self):
5684 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5685 # validate/normalize the node name
5686 self.hostname = netutils.GetHostname(name=self.op.node_name,
5687 family=self.primary_ip_family)
5688 self.op.node_name = self.hostname.name
5690 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5691 raise errors.OpPrereqError("Cannot readd the master node",
5694 if self.op.readd and self.op.group:
5695 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5696 " being readded", errors.ECODE_INVAL)
5698 def BuildHooksEnv(self):
5701 This will run on all nodes before, and on all nodes + the new node after.
5705 "OP_TARGET": self.op.node_name,
5706 "NODE_NAME": self.op.node_name,
5707 "NODE_PIP": self.op.primary_ip,
5708 "NODE_SIP": self.op.secondary_ip,
5709 "MASTER_CAPABLE": str(self.op.master_capable),
5710 "VM_CAPABLE": str(self.op.vm_capable),
5713 def BuildHooksNodes(self):
5714 """Build hooks nodes.
5717 # Exclude added node
5718 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5719 post_nodes = pre_nodes + [self.op.node_name, ]
5721 return (pre_nodes, post_nodes)
5723 def CheckPrereq(self):
5724 """Check prerequisites.
5727 - the new node is not already in the config
5729 - its parameters (single/dual homed) matches the cluster
5731 Any errors are signaled by raising errors.OpPrereqError.
5735 hostname = self.hostname
5736 node = hostname.name
5737 primary_ip = self.op.primary_ip = hostname.ip
5738 if self.op.secondary_ip is None:
5739 if self.primary_ip_family == netutils.IP6Address.family:
5740 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5741 " IPv4 address must be given as secondary",
5743 self.op.secondary_ip = primary_ip
5745 secondary_ip = self.op.secondary_ip
5746 if not netutils.IP4Address.IsValid(secondary_ip):
5747 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5748 " address" % secondary_ip, errors.ECODE_INVAL)
5750 node_list = cfg.GetNodeList()
5751 if not self.op.readd and node in node_list:
5752 raise errors.OpPrereqError("Node %s is already in the configuration" %
5753 node, errors.ECODE_EXISTS)
5754 elif self.op.readd and node not in node_list:
5755 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5758 self.changed_primary_ip = False
5760 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5761 if self.op.readd and node == existing_node_name:
5762 if existing_node.secondary_ip != secondary_ip:
5763 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5764 " address configuration as before",
5766 if existing_node.primary_ip != primary_ip:
5767 self.changed_primary_ip = True
5771 if (existing_node.primary_ip == primary_ip or
5772 existing_node.secondary_ip == primary_ip or
5773 existing_node.primary_ip == secondary_ip or
5774 existing_node.secondary_ip == secondary_ip):
5775 raise errors.OpPrereqError("New node ip address(es) conflict with"
5776 " existing node %s" % existing_node.name,
5777 errors.ECODE_NOTUNIQUE)
5779 # After this 'if' block, None is no longer a valid value for the
5780 # _capable op attributes
5782 old_node = self.cfg.GetNodeInfo(node)
5783 assert old_node is not None, "Can't retrieve locked node %s" % node
5784 for attr in self._NFLAGS:
5785 if getattr(self.op, attr) is None:
5786 setattr(self.op, attr, getattr(old_node, attr))
5788 for attr in self._NFLAGS:
5789 if getattr(self.op, attr) is None:
5790 setattr(self.op, attr, True)
5792 if self.op.readd and not self.op.vm_capable:
5793 pri, sec = cfg.GetNodeInstances(node)
5795 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5796 " flag set to false, but it already holds"
5797 " instances" % node,
5800 # check that the type of the node (single versus dual homed) is the
5801 # same as for the master
5802 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5803 master_singlehomed = myself.secondary_ip == myself.primary_ip
5804 newbie_singlehomed = secondary_ip == primary_ip
5805 if master_singlehomed != newbie_singlehomed:
5806 if master_singlehomed:
5807 raise errors.OpPrereqError("The master has no secondary ip but the"
5808 " new node has one",
5811 raise errors.OpPrereqError("The master has a secondary ip but the"
5812 " new node doesn't have one",
5815 # checks reachability
5816 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5817 raise errors.OpPrereqError("Node not reachable by ping",
5818 errors.ECODE_ENVIRON)
5820 if not newbie_singlehomed:
5821 # check reachability from my secondary ip to newbie's secondary ip
5822 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5823 source=myself.secondary_ip):
5824 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5825 " based ping to node daemon port",
5826 errors.ECODE_ENVIRON)
5833 if self.op.master_capable:
5834 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5836 self.master_candidate = False
5839 self.new_node = old_node
5841 node_group = cfg.LookupNodeGroup(self.op.group)
5842 self.new_node = objects.Node(name=node,
5843 primary_ip=primary_ip,
5844 secondary_ip=secondary_ip,
5845 master_candidate=self.master_candidate,
5846 offline=False, drained=False,
5849 if self.op.ndparams:
5850 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5852 if self.op.hv_state:
5853 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5855 if self.op.disk_state:
5856 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5858 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5859 # it a property on the base class.
5860 result = rpc.DnsOnlyRunner().call_version([node])[node]
5861 result.Raise("Can't get version information from node %s" % node)
5862 if constants.PROTOCOL_VERSION == result.payload:
5863 logging.info("Communication to node %s fine, sw version %s match",
5864 node, result.payload)
5866 raise errors.OpPrereqError("Version mismatch master version %s,"
5867 " node version %s" %
5868 (constants.PROTOCOL_VERSION, result.payload),
5869 errors.ECODE_ENVIRON)
5871 def Exec(self, feedback_fn):
5872 """Adds the new node to the cluster.
5875 new_node = self.new_node
5876 node = new_node.name
5878 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5881 # We adding a new node so we assume it's powered
5882 new_node.powered = True
5884 # for re-adds, reset the offline/drained/master-candidate flags;
5885 # we need to reset here, otherwise offline would prevent RPC calls
5886 # later in the procedure; this also means that if the re-add
5887 # fails, we are left with a non-offlined, broken node
5889 new_node.drained = new_node.offline = False # pylint: disable=W0201
5890 self.LogInfo("Readding a node, the offline/drained flags were reset")
5891 # if we demote the node, we do cleanup later in the procedure
5892 new_node.master_candidate = self.master_candidate
5893 if self.changed_primary_ip:
5894 new_node.primary_ip = self.op.primary_ip
5896 # copy the master/vm_capable flags
5897 for attr in self._NFLAGS:
5898 setattr(new_node, attr, getattr(self.op, attr))
5900 # notify the user about any possible mc promotion
5901 if new_node.master_candidate:
5902 self.LogInfo("Node will be a master candidate")
5904 if self.op.ndparams:
5905 new_node.ndparams = self.op.ndparams
5907 new_node.ndparams = {}
5909 if self.op.hv_state:
5910 new_node.hv_state_static = self.new_hv_state
5912 if self.op.disk_state:
5913 new_node.disk_state_static = self.new_disk_state
5915 # Add node to our /etc/hosts, and add key to known_hosts
5916 if self.cfg.GetClusterInfo().modify_etc_hosts:
5917 master_node = self.cfg.GetMasterNode()
5918 result = self.rpc.call_etc_hosts_modify(master_node,
5919 constants.ETC_HOSTS_ADD,
5922 result.Raise("Can't update hosts file with new host data")
5924 if new_node.secondary_ip != new_node.primary_ip:
5925 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5928 node_verify_list = [self.cfg.GetMasterNode()]
5929 node_verify_param = {
5930 constants.NV_NODELIST: ([node], {}),
5931 # TODO: do a node-net-test as well?
5934 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5935 self.cfg.GetClusterName())
5936 for verifier in node_verify_list:
5937 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5938 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5940 for failed in nl_payload:
5941 feedback_fn("ssh/hostname verification failed"
5942 " (checking from %s): %s" %
5943 (verifier, nl_payload[failed]))
5944 raise errors.OpExecError("ssh/hostname verification failed")
5947 _RedistributeAncillaryFiles(self)
5948 self.context.ReaddNode(new_node)
5949 # make sure we redistribute the config
5950 self.cfg.Update(new_node, feedback_fn)
5951 # and make sure the new node will not have old files around
5952 if not new_node.master_candidate:
5953 result = self.rpc.call_node_demote_from_mc(new_node.name)
5954 msg = result.fail_msg
5956 self.LogWarning("Node failed to demote itself from master"
5957 " candidate status: %s" % msg)
5959 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5960 additional_vm=self.op.vm_capable)
5961 self.context.AddNode(new_node, self.proc.GetECId())
5964 class LUNodeSetParams(LogicalUnit):
5965 """Modifies the parameters of a node.
5967 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5968 to the node role (as _ROLE_*)
5969 @cvar _R2F: a dictionary from node role to tuples of flags
5970 @cvar _FLAGS: a list of attribute names corresponding to the flags
5973 HPATH = "node-modify"
5974 HTYPE = constants.HTYPE_NODE
5976 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5978 (True, False, False): _ROLE_CANDIDATE,
5979 (False, True, False): _ROLE_DRAINED,
5980 (False, False, True): _ROLE_OFFLINE,
5981 (False, False, False): _ROLE_REGULAR,
5983 _R2F = dict((v, k) for k, v in _F2R.items())
5984 _FLAGS = ["master_candidate", "drained", "offline"]
5986 def CheckArguments(self):
5987 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5988 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5989 self.op.master_capable, self.op.vm_capable,
5990 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5992 if all_mods.count(None) == len(all_mods):
5993 raise errors.OpPrereqError("Please pass at least one modification",
5995 if all_mods.count(True) > 1:
5996 raise errors.OpPrereqError("Can't set the node into more than one"
5997 " state at the same time",
6000 # Boolean value that tells us whether we might be demoting from MC
6001 self.might_demote = (self.op.master_candidate is False or
6002 self.op.offline is True or
6003 self.op.drained is True or
6004 self.op.master_capable is False)
6006 if self.op.secondary_ip:
6007 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6008 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6009 " address" % self.op.secondary_ip,
6012 self.lock_all = self.op.auto_promote and self.might_demote
6013 self.lock_instances = self.op.secondary_ip is not None
6015 def _InstanceFilter(self, instance):
6016 """Filter for getting affected instances.
6019 return (instance.disk_template in constants.DTS_INT_MIRROR and
6020 self.op.node_name in instance.all_nodes)
6022 def ExpandNames(self):
6024 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6026 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6028 # Since modifying a node can have severe effects on currently running
6029 # operations the resource lock is at least acquired in shared mode
6030 self.needed_locks[locking.LEVEL_NODE_RES] = \
6031 self.needed_locks[locking.LEVEL_NODE]
6033 # Get node resource and instance locks in shared mode; they are not used
6034 # for anything but read-only access
6035 self.share_locks[locking.LEVEL_NODE_RES] = 1
6036 self.share_locks[locking.LEVEL_INSTANCE] = 1
6038 if self.lock_instances:
6039 self.needed_locks[locking.LEVEL_INSTANCE] = \
6040 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6042 def BuildHooksEnv(self):
6045 This runs on the master node.
6049 "OP_TARGET": self.op.node_name,
6050 "MASTER_CANDIDATE": str(self.op.master_candidate),
6051 "OFFLINE": str(self.op.offline),
6052 "DRAINED": str(self.op.drained),
6053 "MASTER_CAPABLE": str(self.op.master_capable),
6054 "VM_CAPABLE": str(self.op.vm_capable),
6057 def BuildHooksNodes(self):
6058 """Build hooks nodes.
6061 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6064 def CheckPrereq(self):
6065 """Check prerequisites.
6067 This only checks the instance list against the existing names.
6070 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6072 if self.lock_instances:
6073 affected_instances = \
6074 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6076 # Verify instance locks
6077 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6078 wanted_instances = frozenset(affected_instances.keys())
6079 if wanted_instances - owned_instances:
6080 raise errors.OpPrereqError("Instances affected by changing node %s's"
6081 " secondary IP address have changed since"
6082 " locks were acquired, wanted '%s', have"
6083 " '%s'; retry the operation" %
6085 utils.CommaJoin(wanted_instances),
6086 utils.CommaJoin(owned_instances)),
6089 affected_instances = None
6091 if (self.op.master_candidate is not None or
6092 self.op.drained is not None or
6093 self.op.offline is not None):
6094 # we can't change the master's node flags
6095 if self.op.node_name == self.cfg.GetMasterNode():
6096 raise errors.OpPrereqError("The master role can be changed"
6097 " only via master-failover",
6100 if self.op.master_candidate and not node.master_capable:
6101 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6102 " it a master candidate" % node.name,
6105 if self.op.vm_capable is False:
6106 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6108 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6109 " the vm_capable flag" % node.name,
6112 if node.master_candidate and self.might_demote and not self.lock_all:
6113 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6114 # check if after removing the current node, we're missing master
6116 (mc_remaining, mc_should, _) = \
6117 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6118 if mc_remaining < mc_should:
6119 raise errors.OpPrereqError("Not enough master candidates, please"
6120 " pass auto promote option to allow"
6121 " promotion (--auto-promote or RAPI"
6122 " auto_promote=True)", errors.ECODE_STATE)
6124 self.old_flags = old_flags = (node.master_candidate,
6125 node.drained, node.offline)
6126 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6127 self.old_role = old_role = self._F2R[old_flags]
6129 # Check for ineffective changes
6130 for attr in self._FLAGS:
6131 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6132 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6133 setattr(self.op, attr, None)
6135 # Past this point, any flag change to False means a transition
6136 # away from the respective state, as only real changes are kept
6138 # TODO: We might query the real power state if it supports OOB
6139 if _SupportsOob(self.cfg, node):
6140 if self.op.offline is False and not (node.powered or
6141 self.op.powered is True):
6142 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6143 " offline status can be reset") %
6144 self.op.node_name, errors.ECODE_STATE)
6145 elif self.op.powered is not None:
6146 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6147 " as it does not support out-of-band"
6148 " handling") % self.op.node_name,
6151 # If we're being deofflined/drained, we'll MC ourself if needed
6152 if (self.op.drained is False or self.op.offline is False or
6153 (self.op.master_capable and not node.master_capable)):
6154 if _DecideSelfPromotion(self):
6155 self.op.master_candidate = True
6156 self.LogInfo("Auto-promoting node to master candidate")
6158 # If we're no longer master capable, we'll demote ourselves from MC
6159 if self.op.master_capable is False and node.master_candidate:
6160 self.LogInfo("Demoting from master candidate")
6161 self.op.master_candidate = False
6164 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6165 if self.op.master_candidate:
6166 new_role = self._ROLE_CANDIDATE
6167 elif self.op.drained:
6168 new_role = self._ROLE_DRAINED
6169 elif self.op.offline:
6170 new_role = self._ROLE_OFFLINE
6171 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6172 # False is still in new flags, which means we're un-setting (the
6174 new_role = self._ROLE_REGULAR
6175 else: # no new flags, nothing, keep old role
6178 self.new_role = new_role
6180 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6181 # Trying to transition out of offline status
6182 result = self.rpc.call_version([node.name])[node.name]
6184 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6185 " to report its version: %s" %
6186 (node.name, result.fail_msg),
6189 self.LogWarning("Transitioning node from offline to online state"
6190 " without using re-add. Please make sure the node"
6193 # When changing the secondary ip, verify if this is a single-homed to
6194 # multi-homed transition or vice versa, and apply the relevant
6196 if self.op.secondary_ip:
6197 # Ok even without locking, because this can't be changed by any LU
6198 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6199 master_singlehomed = master.secondary_ip == master.primary_ip
6200 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6201 if self.op.force and node.name == master.name:
6202 self.LogWarning("Transitioning from single-homed to multi-homed"
6203 " cluster; all nodes will require a secondary IP"
6206 raise errors.OpPrereqError("Changing the secondary ip on a"
6207 " single-homed cluster requires the"
6208 " --force option to be passed, and the"
6209 " target node to be the master",
6211 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6212 if self.op.force and node.name == master.name:
6213 self.LogWarning("Transitioning from multi-homed to single-homed"
6214 " cluster; secondary IP addresses will have to be"
6217 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6218 " same as the primary IP on a multi-homed"
6219 " cluster, unless the --force option is"
6220 " passed, and the target node is the"
6221 " master", errors.ECODE_INVAL)
6223 assert not (frozenset(affected_instances) -
6224 self.owned_locks(locking.LEVEL_INSTANCE))
6227 if affected_instances:
6228 msg = ("Cannot change secondary IP address: offline node has"
6229 " instances (%s) configured to use it" %
6230 utils.CommaJoin(affected_instances.keys()))
6231 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6233 # On online nodes, check that no instances are running, and that
6234 # the node has the new ip and we can reach it.
6235 for instance in affected_instances.values():
6236 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6237 msg="cannot change secondary ip")
6239 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6240 if master.name != node.name:
6241 # check reachability from master secondary ip to new secondary ip
6242 if not netutils.TcpPing(self.op.secondary_ip,
6243 constants.DEFAULT_NODED_PORT,
6244 source=master.secondary_ip):
6245 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6246 " based ping to node daemon port",
6247 errors.ECODE_ENVIRON)
6249 if self.op.ndparams:
6250 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6251 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6252 self.new_ndparams = new_ndparams
6254 if self.op.hv_state:
6255 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6256 self.node.hv_state_static)
6258 if self.op.disk_state:
6259 self.new_disk_state = \
6260 _MergeAndVerifyDiskState(self.op.disk_state,
6261 self.node.disk_state_static)
6263 def Exec(self, feedback_fn):
6268 old_role = self.old_role
6269 new_role = self.new_role
6273 if self.op.ndparams:
6274 node.ndparams = self.new_ndparams
6276 if self.op.powered is not None:
6277 node.powered = self.op.powered
6279 if self.op.hv_state:
6280 node.hv_state_static = self.new_hv_state
6282 if self.op.disk_state:
6283 node.disk_state_static = self.new_disk_state
6285 for attr in ["master_capable", "vm_capable"]:
6286 val = getattr(self.op, attr)
6288 setattr(node, attr, val)
6289 result.append((attr, str(val)))
6291 if new_role != old_role:
6292 # Tell the node to demote itself, if no longer MC and not offline
6293 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6294 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6296 self.LogWarning("Node failed to demote itself: %s", msg)
6298 new_flags = self._R2F[new_role]
6299 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6301 result.append((desc, str(nf)))
6302 (node.master_candidate, node.drained, node.offline) = new_flags
6304 # we locked all nodes, we adjust the CP before updating this node
6306 _AdjustCandidatePool(self, [node.name])
6308 if self.op.secondary_ip:
6309 node.secondary_ip = self.op.secondary_ip
6310 result.append(("secondary_ip", self.op.secondary_ip))
6312 # this will trigger configuration file update, if needed
6313 self.cfg.Update(node, feedback_fn)
6315 # this will trigger job queue propagation or cleanup if the mc
6317 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6318 self.context.ReaddNode(node)
6323 class LUNodePowercycle(NoHooksLU):
6324 """Powercycles a node.
6329 def CheckArguments(self):
6330 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6331 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6332 raise errors.OpPrereqError("The node is the master and the force"
6333 " parameter was not set",
6336 def ExpandNames(self):
6337 """Locking for PowercycleNode.
6339 This is a last-resort option and shouldn't block on other
6340 jobs. Therefore, we grab no locks.
6343 self.needed_locks = {}
6345 def Exec(self, feedback_fn):
6349 result = self.rpc.call_node_powercycle(self.op.node_name,
6350 self.cfg.GetHypervisorType())
6351 result.Raise("Failed to schedule the reboot")
6352 return result.payload
6355 class LUClusterQuery(NoHooksLU):
6356 """Query cluster configuration.
6361 def ExpandNames(self):
6362 self.needed_locks = {}
6364 def Exec(self, feedback_fn):
6365 """Return cluster config.
6368 cluster = self.cfg.GetClusterInfo()
6371 # Filter just for enabled hypervisors
6372 for os_name, hv_dict in cluster.os_hvp.items():
6373 os_hvp[os_name] = {}
6374 for hv_name, hv_params in hv_dict.items():
6375 if hv_name in cluster.enabled_hypervisors:
6376 os_hvp[os_name][hv_name] = hv_params
6378 # Convert ip_family to ip_version
6379 primary_ip_version = constants.IP4_VERSION
6380 if cluster.primary_ip_family == netutils.IP6Address.family:
6381 primary_ip_version = constants.IP6_VERSION
6384 "software_version": constants.RELEASE_VERSION,
6385 "protocol_version": constants.PROTOCOL_VERSION,
6386 "config_version": constants.CONFIG_VERSION,
6387 "os_api_version": max(constants.OS_API_VERSIONS),
6388 "export_version": constants.EXPORT_VERSION,
6389 "architecture": runtime.GetArchInfo(),
6390 "name": cluster.cluster_name,
6391 "master": cluster.master_node,
6392 "default_hypervisor": cluster.primary_hypervisor,
6393 "enabled_hypervisors": cluster.enabled_hypervisors,
6394 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6395 for hypervisor_name in cluster.enabled_hypervisors]),
6397 "beparams": cluster.beparams,
6398 "osparams": cluster.osparams,
6399 "ipolicy": cluster.ipolicy,
6400 "nicparams": cluster.nicparams,
6401 "ndparams": cluster.ndparams,
6402 "diskparams": cluster.diskparams,
6403 "candidate_pool_size": cluster.candidate_pool_size,
6404 "master_netdev": cluster.master_netdev,
6405 "master_netmask": cluster.master_netmask,
6406 "use_external_mip_script": cluster.use_external_mip_script,
6407 "volume_group_name": cluster.volume_group_name,
6408 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6409 "file_storage_dir": cluster.file_storage_dir,
6410 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6411 "maintain_node_health": cluster.maintain_node_health,
6412 "ctime": cluster.ctime,
6413 "mtime": cluster.mtime,
6414 "uuid": cluster.uuid,
6415 "tags": list(cluster.GetTags()),
6416 "uid_pool": cluster.uid_pool,
6417 "default_iallocator": cluster.default_iallocator,
6418 "reserved_lvs": cluster.reserved_lvs,
6419 "primary_ip_version": primary_ip_version,
6420 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6421 "hidden_os": cluster.hidden_os,
6422 "blacklisted_os": cluster.blacklisted_os,
6428 class LUClusterConfigQuery(NoHooksLU):
6429 """Return configuration values.
6434 def CheckArguments(self):
6435 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6437 def ExpandNames(self):
6438 self.cq.ExpandNames(self)
6440 def DeclareLocks(self, level):
6441 self.cq.DeclareLocks(self, level)
6443 def Exec(self, feedback_fn):
6444 result = self.cq.OldStyleQuery(self)
6446 assert len(result) == 1
6451 class _ClusterQuery(_QueryBase):
6452 FIELDS = query.CLUSTER_FIELDS
6454 #: Do not sort (there is only one item)
6457 def ExpandNames(self, lu):
6458 lu.needed_locks = {}
6460 # The following variables interact with _QueryBase._GetNames
6461 self.wanted = locking.ALL_SET
6462 self.do_locking = self.use_locking
6465 raise errors.OpPrereqError("Can not use locking for cluster queries",
6468 def DeclareLocks(self, lu, level):
6471 def _GetQueryData(self, lu):
6472 """Computes the list of nodes and their attributes.
6475 # Locking is not used
6476 assert not (compat.any(lu.glm.is_owned(level)
6477 for level in locking.LEVELS
6478 if level != locking.LEVEL_CLUSTER) or
6479 self.do_locking or self.use_locking)
6481 if query.CQ_CONFIG in self.requested_data:
6482 cluster = lu.cfg.GetClusterInfo()
6484 cluster = NotImplemented
6486 if query.CQ_QUEUE_DRAINED in self.requested_data:
6487 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6489 drain_flag = NotImplemented
6491 if query.CQ_WATCHER_PAUSE in self.requested_data:
6492 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6494 watcher_pause = NotImplemented
6496 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6499 class LUInstanceActivateDisks(NoHooksLU):
6500 """Bring up an instance's disks.
6505 def ExpandNames(self):
6506 self._ExpandAndLockInstance()
6507 self.needed_locks[locking.LEVEL_NODE] = []
6508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6510 def DeclareLocks(self, level):
6511 if level == locking.LEVEL_NODE:
6512 self._LockInstancesNodes()
6514 def CheckPrereq(self):
6515 """Check prerequisites.
6517 This checks that the instance is in the cluster.
6520 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6521 assert self.instance is not None, \
6522 "Cannot retrieve locked instance %s" % self.op.instance_name
6523 _CheckNodeOnline(self, self.instance.primary_node)
6525 def Exec(self, feedback_fn):
6526 """Activate the disks.
6529 disks_ok, disks_info = \
6530 _AssembleInstanceDisks(self, self.instance,
6531 ignore_size=self.op.ignore_size)
6533 raise errors.OpExecError("Cannot activate block devices")
6535 if self.op.wait_for_sync:
6536 if not _WaitForSync(self, self.instance):
6537 raise errors.OpExecError("Some disks of the instance are degraded!")
6542 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6544 """Prepare the block devices for an instance.
6546 This sets up the block devices on all nodes.
6548 @type lu: L{LogicalUnit}
6549 @param lu: the logical unit on whose behalf we execute
6550 @type instance: L{objects.Instance}
6551 @param instance: the instance for whose disks we assemble
6552 @type disks: list of L{objects.Disk} or None
6553 @param disks: which disks to assemble (or all, if None)
6554 @type ignore_secondaries: boolean
6555 @param ignore_secondaries: if true, errors on secondary nodes
6556 won't result in an error return from the function
6557 @type ignore_size: boolean
6558 @param ignore_size: if true, the current known size of the disk
6559 will not be used during the disk activation, useful for cases
6560 when the size is wrong
6561 @return: False if the operation failed, otherwise a list of
6562 (host, instance_visible_name, node_visible_name)
6563 with the mapping from node devices to instance devices
6568 iname = instance.name
6569 disks = _ExpandCheckDisks(instance, disks)
6571 # With the two passes mechanism we try to reduce the window of
6572 # opportunity for the race condition of switching DRBD to primary
6573 # before handshaking occured, but we do not eliminate it
6575 # The proper fix would be to wait (with some limits) until the
6576 # connection has been made and drbd transitions from WFConnection
6577 # into any other network-connected state (Connected, SyncTarget,
6580 # 1st pass, assemble on all nodes in secondary mode
6581 for idx, inst_disk in enumerate(disks):
6582 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6584 node_disk = node_disk.Copy()
6585 node_disk.UnsetSize()
6586 lu.cfg.SetDiskID(node_disk, node)
6587 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6589 msg = result.fail_msg
6591 is_offline_secondary = (node in instance.secondary_nodes and
6593 lu.LogWarning("Could not prepare block device %s on node %s"
6594 " (is_primary=False, pass=1): %s",
6595 inst_disk.iv_name, node, msg)
6596 if not (ignore_secondaries or is_offline_secondary):
6599 # FIXME: race condition on drbd migration to primary
6601 # 2nd pass, do only the primary node
6602 for idx, inst_disk in enumerate(disks):
6605 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6606 if node != instance.primary_node:
6609 node_disk = node_disk.Copy()
6610 node_disk.UnsetSize()
6611 lu.cfg.SetDiskID(node_disk, node)
6612 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6614 msg = result.fail_msg
6616 lu.LogWarning("Could not prepare block device %s on node %s"
6617 " (is_primary=True, pass=2): %s",
6618 inst_disk.iv_name, node, msg)
6621 dev_path = result.payload
6623 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6625 # leave the disks configured for the primary node
6626 # this is a workaround that would be fixed better by
6627 # improving the logical/physical id handling
6629 lu.cfg.SetDiskID(disk, instance.primary_node)
6631 return disks_ok, device_info
6634 def _StartInstanceDisks(lu, instance, force):
6635 """Start the disks of an instance.
6638 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6639 ignore_secondaries=force)
6641 _ShutdownInstanceDisks(lu, instance)
6642 if force is not None and not force:
6644 hint=("If the message above refers to a secondary node,"
6645 " you can retry the operation using '--force'"))
6646 raise errors.OpExecError("Disk consistency error")
6649 class LUInstanceDeactivateDisks(NoHooksLU):
6650 """Shutdown an instance's disks.
6655 def ExpandNames(self):
6656 self._ExpandAndLockInstance()
6657 self.needed_locks[locking.LEVEL_NODE] = []
6658 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6660 def DeclareLocks(self, level):
6661 if level == locking.LEVEL_NODE:
6662 self._LockInstancesNodes()
6664 def CheckPrereq(self):
6665 """Check prerequisites.
6667 This checks that the instance is in the cluster.
6670 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6671 assert self.instance is not None, \
6672 "Cannot retrieve locked instance %s" % self.op.instance_name
6674 def Exec(self, feedback_fn):
6675 """Deactivate the disks
6678 instance = self.instance
6680 _ShutdownInstanceDisks(self, instance)
6682 _SafeShutdownInstanceDisks(self, instance)
6685 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6686 """Shutdown block devices of an instance.
6688 This function checks if an instance is running, before calling
6689 _ShutdownInstanceDisks.
6692 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6693 _ShutdownInstanceDisks(lu, instance, disks=disks)
6696 def _ExpandCheckDisks(instance, disks):
6697 """Return the instance disks selected by the disks list
6699 @type disks: list of L{objects.Disk} or None
6700 @param disks: selected disks
6701 @rtype: list of L{objects.Disk}
6702 @return: selected instance disks to act on
6706 return instance.disks
6708 if not set(disks).issubset(instance.disks):
6709 raise errors.ProgrammerError("Can only act on disks belonging to the"
6714 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6715 """Shutdown block devices of an instance.
6717 This does the shutdown on all nodes of the instance.
6719 If the ignore_primary is false, errors on the primary node are
6724 disks = _ExpandCheckDisks(instance, disks)
6727 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6728 lu.cfg.SetDiskID(top_disk, node)
6729 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6730 msg = result.fail_msg
6732 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6733 disk.iv_name, node, msg)
6734 if ((node == instance.primary_node and not ignore_primary) or
6735 (node != instance.primary_node and not result.offline)):
6740 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6741 """Checks if a node has enough free memory.
6743 This function check if a given node has the needed amount of free
6744 memory. In case the node has less memory or we cannot get the
6745 information from the node, this function raise an OpPrereqError
6748 @type lu: C{LogicalUnit}
6749 @param lu: a logical unit from which we get configuration data
6751 @param node: the node to check
6752 @type reason: C{str}
6753 @param reason: string to use in the error message
6754 @type requested: C{int}
6755 @param requested: the amount of memory in MiB to check for
6756 @type hypervisor_name: C{str}
6757 @param hypervisor_name: the hypervisor to ask for memory stats
6759 @return: node current free memory
6760 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6761 we cannot check the node
6764 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6765 nodeinfo[node].Raise("Can't get data from node %s" % node,
6766 prereq=True, ecode=errors.ECODE_ENVIRON)
6767 (_, _, (hv_info, )) = nodeinfo[node].payload
6769 free_mem = hv_info.get("memory_free", None)
6770 if not isinstance(free_mem, int):
6771 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6772 " was '%s'" % (node, free_mem),
6773 errors.ECODE_ENVIRON)
6774 if requested > free_mem:
6775 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6776 " needed %s MiB, available %s MiB" %
6777 (node, reason, requested, free_mem),
6782 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6783 """Checks if nodes have enough free disk space in the all VGs.
6785 This function check if all given nodes have the needed amount of
6786 free disk. In case any node has less disk or we cannot get the
6787 information from the node, this function raise an OpPrereqError
6790 @type lu: C{LogicalUnit}
6791 @param lu: a logical unit from which we get configuration data
6792 @type nodenames: C{list}
6793 @param nodenames: the list of node names to check
6794 @type req_sizes: C{dict}
6795 @param req_sizes: the hash of vg and corresponding amount of disk in
6797 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6798 or we cannot check the node
6801 for vg, req_size in req_sizes.items():
6802 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6805 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6806 """Checks if nodes have enough free disk space in the specified VG.
6808 This function check if all given nodes have the needed amount of
6809 free disk. In case any node has less disk or we cannot get the
6810 information from the node, this function raise an OpPrereqError
6813 @type lu: C{LogicalUnit}
6814 @param lu: a logical unit from which we get configuration data
6815 @type nodenames: C{list}
6816 @param nodenames: the list of node names to check
6818 @param vg: the volume group to check
6819 @type requested: C{int}
6820 @param requested: the amount of disk in MiB to check for
6821 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6822 or we cannot check the node
6825 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6826 for node in nodenames:
6827 info = nodeinfo[node]
6828 info.Raise("Cannot get current information from node %s" % node,
6829 prereq=True, ecode=errors.ECODE_ENVIRON)
6830 (_, (vg_info, ), _) = info.payload
6831 vg_free = vg_info.get("vg_free", None)
6832 if not isinstance(vg_free, int):
6833 raise errors.OpPrereqError("Can't compute free disk space on node"
6834 " %s for vg %s, result was '%s'" %
6835 (node, vg, vg_free), errors.ECODE_ENVIRON)
6836 if requested > vg_free:
6837 raise errors.OpPrereqError("Not enough disk space on target node %s"
6838 " vg %s: required %d MiB, available %d MiB" %
6839 (node, vg, requested, vg_free),
6843 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6844 """Checks if nodes have enough physical CPUs
6846 This function checks if all given nodes have the needed number of
6847 physical CPUs. In case any node has less CPUs or we cannot get the
6848 information from the node, this function raises an OpPrereqError
6851 @type lu: C{LogicalUnit}
6852 @param lu: a logical unit from which we get configuration data
6853 @type nodenames: C{list}
6854 @param nodenames: the list of node names to check
6855 @type requested: C{int}
6856 @param requested: the minimum acceptable number of physical CPUs
6857 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6858 or we cannot check the node
6861 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6862 for node in nodenames:
6863 info = nodeinfo[node]
6864 info.Raise("Cannot get current information from node %s" % node,
6865 prereq=True, ecode=errors.ECODE_ENVIRON)
6866 (_, _, (hv_info, )) = info.payload
6867 num_cpus = hv_info.get("cpu_total", None)
6868 if not isinstance(num_cpus, int):
6869 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6870 " on node %s, result was '%s'" %
6871 (node, num_cpus), errors.ECODE_ENVIRON)
6872 if requested > num_cpus:
6873 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6874 "required" % (node, num_cpus, requested),
6878 class LUInstanceStartup(LogicalUnit):
6879 """Starts an instance.
6882 HPATH = "instance-start"
6883 HTYPE = constants.HTYPE_INSTANCE
6886 def CheckArguments(self):
6888 if self.op.beparams:
6889 # fill the beparams dict
6890 objects.UpgradeBeParams(self.op.beparams)
6891 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6893 def ExpandNames(self):
6894 self._ExpandAndLockInstance()
6895 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6897 def DeclareLocks(self, level):
6898 if level == locking.LEVEL_NODE_RES:
6899 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6901 def BuildHooksEnv(self):
6904 This runs on master, primary and secondary nodes of the instance.
6908 "FORCE": self.op.force,
6911 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6915 def BuildHooksNodes(self):
6916 """Build hooks nodes.
6919 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6922 def CheckPrereq(self):
6923 """Check prerequisites.
6925 This checks that the instance is in the cluster.
6928 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929 assert self.instance is not None, \
6930 "Cannot retrieve locked instance %s" % self.op.instance_name
6933 if self.op.hvparams:
6934 # check hypervisor parameter syntax (locally)
6935 cluster = self.cfg.GetClusterInfo()
6936 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6937 filled_hvp = cluster.FillHV(instance)
6938 filled_hvp.update(self.op.hvparams)
6939 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6940 hv_type.CheckParameterSyntax(filled_hvp)
6941 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6943 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6945 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6947 if self.primary_offline and self.op.ignore_offline_nodes:
6948 self.LogWarning("Ignoring offline primary node")
6950 if self.op.hvparams or self.op.beparams:
6951 self.LogWarning("Overridden parameters are ignored")
6953 _CheckNodeOnline(self, instance.primary_node)
6955 bep = self.cfg.GetClusterInfo().FillBE(instance)
6956 bep.update(self.op.beparams)
6958 # check bridges existence
6959 _CheckInstanceBridgesExist(self, instance)
6961 remote_info = self.rpc.call_instance_info(instance.primary_node,
6963 instance.hypervisor)
6964 remote_info.Raise("Error checking node %s" % instance.primary_node,
6965 prereq=True, ecode=errors.ECODE_ENVIRON)
6966 if not remote_info.payload: # not running already
6967 _CheckNodeFreeMemory(self, instance.primary_node,
6968 "starting instance %s" % instance.name,
6969 bep[constants.BE_MINMEM], instance.hypervisor)
6971 def Exec(self, feedback_fn):
6972 """Start the instance.
6975 instance = self.instance
6976 force = self.op.force
6978 if not self.op.no_remember:
6979 self.cfg.MarkInstanceUp(instance.name)
6981 if self.primary_offline:
6982 assert self.op.ignore_offline_nodes
6983 self.LogInfo("Primary node offline, marked instance as started")
6985 node_current = instance.primary_node
6987 _StartInstanceDisks(self, instance, force)
6990 self.rpc.call_instance_start(node_current,
6991 (instance, self.op.hvparams,
6993 self.op.startup_paused)
6994 msg = result.fail_msg
6996 _ShutdownInstanceDisks(self, instance)
6997 raise errors.OpExecError("Could not start instance: %s" % msg)
7000 class LUInstanceReboot(LogicalUnit):
7001 """Reboot an instance.
7004 HPATH = "instance-reboot"
7005 HTYPE = constants.HTYPE_INSTANCE
7008 def ExpandNames(self):
7009 self._ExpandAndLockInstance()
7011 def BuildHooksEnv(self):
7014 This runs on master, primary and secondary nodes of the instance.
7018 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7019 "REBOOT_TYPE": self.op.reboot_type,
7020 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7023 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7027 def BuildHooksNodes(self):
7028 """Build hooks nodes.
7031 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7034 def CheckPrereq(self):
7035 """Check prerequisites.
7037 This checks that the instance is in the cluster.
7040 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7041 assert self.instance is not None, \
7042 "Cannot retrieve locked instance %s" % self.op.instance_name
7043 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7044 _CheckNodeOnline(self, instance.primary_node)
7046 # check bridges existence
7047 _CheckInstanceBridgesExist(self, instance)
7049 def Exec(self, feedback_fn):
7050 """Reboot the instance.
7053 instance = self.instance
7054 ignore_secondaries = self.op.ignore_secondaries
7055 reboot_type = self.op.reboot_type
7057 remote_info = self.rpc.call_instance_info(instance.primary_node,
7059 instance.hypervisor)
7060 remote_info.Raise("Error checking node %s" % instance.primary_node)
7061 instance_running = bool(remote_info.payload)
7063 node_current = instance.primary_node
7065 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7066 constants.INSTANCE_REBOOT_HARD]:
7067 for disk in instance.disks:
7068 self.cfg.SetDiskID(disk, node_current)
7069 result = self.rpc.call_instance_reboot(node_current, instance,
7071 self.op.shutdown_timeout)
7072 result.Raise("Could not reboot instance")
7074 if instance_running:
7075 result = self.rpc.call_instance_shutdown(node_current, instance,
7076 self.op.shutdown_timeout)
7077 result.Raise("Could not shutdown instance for full reboot")
7078 _ShutdownInstanceDisks(self, instance)
7080 self.LogInfo("Instance %s was already stopped, starting now",
7082 _StartInstanceDisks(self, instance, ignore_secondaries)
7083 result = self.rpc.call_instance_start(node_current,
7084 (instance, None, None), False)
7085 msg = result.fail_msg
7087 _ShutdownInstanceDisks(self, instance)
7088 raise errors.OpExecError("Could not start instance for"
7089 " full reboot: %s" % msg)
7091 self.cfg.MarkInstanceUp(instance.name)
7094 class LUInstanceShutdown(LogicalUnit):
7095 """Shutdown an instance.
7098 HPATH = "instance-stop"
7099 HTYPE = constants.HTYPE_INSTANCE
7102 def ExpandNames(self):
7103 self._ExpandAndLockInstance()
7105 def BuildHooksEnv(self):
7108 This runs on master, primary and secondary nodes of the instance.
7111 env = _BuildInstanceHookEnvByObject(self, self.instance)
7112 env["TIMEOUT"] = self.op.timeout
7115 def BuildHooksNodes(self):
7116 """Build hooks nodes.
7119 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7122 def CheckPrereq(self):
7123 """Check prerequisites.
7125 This checks that the instance is in the cluster.
7128 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7129 assert self.instance is not None, \
7130 "Cannot retrieve locked instance %s" % self.op.instance_name
7132 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7134 self.primary_offline = \
7135 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7137 if self.primary_offline and self.op.ignore_offline_nodes:
7138 self.LogWarning("Ignoring offline primary node")
7140 _CheckNodeOnline(self, self.instance.primary_node)
7142 def Exec(self, feedback_fn):
7143 """Shutdown the instance.
7146 instance = self.instance
7147 node_current = instance.primary_node
7148 timeout = self.op.timeout
7150 if not self.op.no_remember:
7151 self.cfg.MarkInstanceDown(instance.name)
7153 if self.primary_offline:
7154 assert self.op.ignore_offline_nodes
7155 self.LogInfo("Primary node offline, marked instance as stopped")
7157 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7158 msg = result.fail_msg
7160 self.LogWarning("Could not shutdown instance: %s", msg)
7162 _ShutdownInstanceDisks(self, instance)
7165 class LUInstanceReinstall(LogicalUnit):
7166 """Reinstall an instance.
7169 HPATH = "instance-reinstall"
7170 HTYPE = constants.HTYPE_INSTANCE
7173 def ExpandNames(self):
7174 self._ExpandAndLockInstance()
7176 def BuildHooksEnv(self):
7179 This runs on master, primary and secondary nodes of the instance.
7182 return _BuildInstanceHookEnvByObject(self, self.instance)
7184 def BuildHooksNodes(self):
7185 """Build hooks nodes.
7188 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7191 def CheckPrereq(self):
7192 """Check prerequisites.
7194 This checks that the instance is in the cluster and is not running.
7197 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7198 assert instance is not None, \
7199 "Cannot retrieve locked instance %s" % self.op.instance_name
7200 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7201 " offline, cannot reinstall")
7203 if instance.disk_template == constants.DT_DISKLESS:
7204 raise errors.OpPrereqError("Instance '%s' has no disks" %
7205 self.op.instance_name,
7207 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7209 if self.op.os_type is not None:
7211 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7212 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7213 instance_os = self.op.os_type
7215 instance_os = instance.os
7217 nodelist = list(instance.all_nodes)
7219 if self.op.osparams:
7220 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7221 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7222 self.os_inst = i_osdict # the new dict (without defaults)
7226 self.instance = instance
7228 def Exec(self, feedback_fn):
7229 """Reinstall the instance.
7232 inst = self.instance
7234 if self.op.os_type is not None:
7235 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7236 inst.os = self.op.os_type
7237 # Write to configuration
7238 self.cfg.Update(inst, feedback_fn)
7240 _StartInstanceDisks(self, inst, None)
7242 feedback_fn("Running the instance OS create scripts...")
7243 # FIXME: pass debug option from opcode to backend
7244 result = self.rpc.call_instance_os_add(inst.primary_node,
7245 (inst, self.os_inst), True,
7246 self.op.debug_level)
7247 result.Raise("Could not install OS for instance %s on node %s" %
7248 (inst.name, inst.primary_node))
7250 _ShutdownInstanceDisks(self, inst)
7253 class LUInstanceRecreateDisks(LogicalUnit):
7254 """Recreate an instance's missing disks.
7257 HPATH = "instance-recreate-disks"
7258 HTYPE = constants.HTYPE_INSTANCE
7261 _MODIFYABLE = frozenset([
7262 constants.IDISK_SIZE,
7263 constants.IDISK_MODE,
7266 # New or changed disk parameters may have different semantics
7267 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7268 constants.IDISK_ADOPT,
7270 # TODO: Implement support changing VG while recreating
7272 constants.IDISK_METAVG,
7275 def _RunAllocator(self):
7276 """Run the allocator based on input opcode.
7279 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7282 # The allocator should actually run in "relocate" mode, but current
7283 # allocators don't support relocating all the nodes of an instance at
7284 # the same time. As a workaround we use "allocate" mode, but this is
7285 # suboptimal for two reasons:
7286 # - The instance name passed to the allocator is present in the list of
7287 # existing instances, so there could be a conflict within the
7288 # internal structures of the allocator. This doesn't happen with the
7289 # current allocators, but it's a liability.
7290 # - The allocator counts the resources used by the instance twice: once
7291 # because the instance exists already, and once because it tries to
7292 # allocate a new instance.
7293 # The allocator could choose some of the nodes on which the instance is
7294 # running, but that's not a problem. If the instance nodes are broken,
7295 # they should be already be marked as drained or offline, and hence
7296 # skipped by the allocator. If instance disks have been lost for other
7297 # reasons, then recreating the disks on the same nodes should be fine.
7298 disk_template = self.instance.disk_template
7299 spindle_use = be_full[constants.BE_SPINDLE_USE]
7300 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7301 disk_template=disk_template,
7302 tags=list(self.instance.GetTags()),
7303 os=self.instance.os,
7305 vcpus=be_full[constants.BE_VCPUS],
7306 memory=be_full[constants.BE_MAXMEM],
7307 spindle_use=spindle_use,
7308 disks=[{constants.IDISK_SIZE: d.size,
7309 constants.IDISK_MODE: d.mode}
7310 for d in self.instance.disks],
7311 hypervisor=self.instance.hypervisor)
7312 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7314 ial.Run(self.op.iallocator)
7316 assert req.RequiredNodes() == len(self.instance.all_nodes)
7319 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7320 " %s" % (self.op.iallocator, ial.info),
7323 self.op.nodes = ial.result
7324 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7325 self.op.instance_name, self.op.iallocator,
7326 utils.CommaJoin(ial.result))
7328 def CheckArguments(self):
7329 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7330 # Normalize and convert deprecated list of disk indices
7331 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7333 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7335 raise errors.OpPrereqError("Some disks have been specified more than"
7336 " once: %s" % utils.CommaJoin(duplicates),
7339 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7340 # when neither iallocator nor nodes are specified
7341 if self.op.iallocator or self.op.nodes:
7342 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7344 for (idx, params) in self.op.disks:
7345 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7346 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7348 raise errors.OpPrereqError("Parameters for disk %s try to change"
7349 " unmodifyable parameter(s): %s" %
7350 (idx, utils.CommaJoin(unsupported)),
7353 def ExpandNames(self):
7354 self._ExpandAndLockInstance()
7355 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7357 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7358 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7360 self.needed_locks[locking.LEVEL_NODE] = []
7361 if self.op.iallocator:
7362 # iallocator will select a new node in the same group
7363 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7364 self.needed_locks[locking.LEVEL_NODE_RES] = []
7366 def DeclareLocks(self, level):
7367 if level == locking.LEVEL_NODEGROUP:
7368 assert self.op.iallocator is not None
7369 assert not self.op.nodes
7370 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7371 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7372 # Lock the primary group used by the instance optimistically; this
7373 # requires going via the node before it's locked, requiring
7374 # verification later on
7375 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7376 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7378 elif level == locking.LEVEL_NODE:
7379 # If an allocator is used, then we lock all the nodes in the current
7380 # instance group, as we don't know yet which ones will be selected;
7381 # if we replace the nodes without using an allocator, locks are
7382 # already declared in ExpandNames; otherwise, we need to lock all the
7383 # instance nodes for disk re-creation
7384 if self.op.iallocator:
7385 assert not self.op.nodes
7386 assert not self.needed_locks[locking.LEVEL_NODE]
7387 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7389 # Lock member nodes of the group of the primary node
7390 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7391 self.needed_locks[locking.LEVEL_NODE].extend(
7392 self.cfg.GetNodeGroup(group_uuid).members)
7393 elif not self.op.nodes:
7394 self._LockInstancesNodes(primary_only=False)
7395 elif level == locking.LEVEL_NODE_RES:
7397 self.needed_locks[locking.LEVEL_NODE_RES] = \
7398 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7400 def BuildHooksEnv(self):
7403 This runs on master, primary and secondary nodes of the instance.
7406 return _BuildInstanceHookEnvByObject(self, self.instance)
7408 def BuildHooksNodes(self):
7409 """Build hooks nodes.
7412 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7415 def CheckPrereq(self):
7416 """Check prerequisites.
7418 This checks that the instance is in the cluster and is not running.
7421 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7422 assert instance is not None, \
7423 "Cannot retrieve locked instance %s" % self.op.instance_name
7425 if len(self.op.nodes) != len(instance.all_nodes):
7426 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7427 " %d replacement nodes were specified" %
7428 (instance.name, len(instance.all_nodes),
7429 len(self.op.nodes)),
7431 assert instance.disk_template != constants.DT_DRBD8 or \
7432 len(self.op.nodes) == 2
7433 assert instance.disk_template != constants.DT_PLAIN or \
7434 len(self.op.nodes) == 1
7435 primary_node = self.op.nodes[0]
7437 primary_node = instance.primary_node
7438 if not self.op.iallocator:
7439 _CheckNodeOnline(self, primary_node)
7441 if instance.disk_template == constants.DT_DISKLESS:
7442 raise errors.OpPrereqError("Instance '%s' has no disks" %
7443 self.op.instance_name, errors.ECODE_INVAL)
7445 # Verify if node group locks are still correct
7446 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7448 # Node group locks are acquired only for the primary node (and only
7449 # when the allocator is used)
7450 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7453 # if we replace nodes *and* the old primary is offline, we don't
7454 # check the instance state
7455 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7456 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7457 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7458 msg="cannot recreate disks")
7461 self.disks = dict(self.op.disks)
7463 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7465 maxidx = max(self.disks.keys())
7466 if maxidx >= len(instance.disks):
7467 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7470 if ((self.op.nodes or self.op.iallocator) and
7471 sorted(self.disks.keys()) != range(len(instance.disks))):
7472 raise errors.OpPrereqError("Can't recreate disks partially and"
7473 " change the nodes at the same time",
7476 self.instance = instance
7478 if self.op.iallocator:
7479 self._RunAllocator()
7480 # Release unneeded node and node resource locks
7481 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7482 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7484 def Exec(self, feedback_fn):
7485 """Recreate the disks.
7488 instance = self.instance
7490 assert (self.owned_locks(locking.LEVEL_NODE) ==
7491 self.owned_locks(locking.LEVEL_NODE_RES))
7494 mods = [] # keeps track of needed changes
7496 for idx, disk in enumerate(instance.disks):
7498 changes = self.disks[idx]
7500 # Disk should not be recreated
7504 # update secondaries for disks, if needed
7505 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7506 # need to update the nodes and minors
7507 assert len(self.op.nodes) == 2
7508 assert len(disk.logical_id) == 6 # otherwise disk internals
7510 (_, _, old_port, _, _, old_secret) = disk.logical_id
7511 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7512 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7513 new_minors[0], new_minors[1], old_secret)
7514 assert len(disk.logical_id) == len(new_id)
7518 mods.append((idx, new_id, changes))
7520 # now that we have passed all asserts above, we can apply the mods
7521 # in a single run (to avoid partial changes)
7522 for idx, new_id, changes in mods:
7523 disk = instance.disks[idx]
7524 if new_id is not None:
7525 assert disk.dev_type == constants.LD_DRBD8
7526 disk.logical_id = new_id
7528 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7529 mode=changes.get(constants.IDISK_MODE, None))
7531 # change primary node, if needed
7533 instance.primary_node = self.op.nodes[0]
7534 self.LogWarning("Changing the instance's nodes, you will have to"
7535 " remove any disks left on the older nodes manually")
7538 self.cfg.Update(instance, feedback_fn)
7540 # All touched nodes must be locked
7541 mylocks = self.owned_locks(locking.LEVEL_NODE)
7542 assert mylocks.issuperset(frozenset(instance.all_nodes))
7543 _CreateDisks(self, instance, to_skip=to_skip)
7546 class LUInstanceRename(LogicalUnit):
7547 """Rename an instance.
7550 HPATH = "instance-rename"
7551 HTYPE = constants.HTYPE_INSTANCE
7553 def CheckArguments(self):
7557 if self.op.ip_check and not self.op.name_check:
7558 # TODO: make the ip check more flexible and not depend on the name check
7559 raise errors.OpPrereqError("IP address check requires a name check",
7562 def BuildHooksEnv(self):
7565 This runs on master, primary and secondary nodes of the instance.
7568 env = _BuildInstanceHookEnvByObject(self, self.instance)
7569 env["INSTANCE_NEW_NAME"] = self.op.new_name
7572 def BuildHooksNodes(self):
7573 """Build hooks nodes.
7576 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7579 def CheckPrereq(self):
7580 """Check prerequisites.
7582 This checks that the instance is in the cluster and is not running.
7585 self.op.instance_name = _ExpandInstanceName(self.cfg,
7586 self.op.instance_name)
7587 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7588 assert instance is not None
7589 _CheckNodeOnline(self, instance.primary_node)
7590 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7591 msg="cannot rename")
7592 self.instance = instance
7594 new_name = self.op.new_name
7595 if self.op.name_check:
7596 hostname = _CheckHostnameSane(self, new_name)
7597 new_name = self.op.new_name = hostname.name
7598 if (self.op.ip_check and
7599 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7600 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7601 (hostname.ip, new_name),
7602 errors.ECODE_NOTUNIQUE)
7604 instance_list = self.cfg.GetInstanceList()
7605 if new_name in instance_list and new_name != instance.name:
7606 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7607 new_name, errors.ECODE_EXISTS)
7609 def Exec(self, feedback_fn):
7610 """Rename the instance.
7613 inst = self.instance
7614 old_name = inst.name
7616 rename_file_storage = False
7617 if (inst.disk_template in constants.DTS_FILEBASED and
7618 self.op.new_name != inst.name):
7619 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7620 rename_file_storage = True
7622 self.cfg.RenameInstance(inst.name, self.op.new_name)
7623 # Change the instance lock. This is definitely safe while we hold the BGL.
7624 # Otherwise the new lock would have to be added in acquired mode.
7626 assert self.glm.is_owned(locking.BGL)
7627 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7628 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7630 # re-read the instance from the configuration after rename
7631 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7633 if rename_file_storage:
7634 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7635 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7636 old_file_storage_dir,
7637 new_file_storage_dir)
7638 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7639 " (but the instance has been renamed in Ganeti)" %
7640 (inst.primary_node, old_file_storage_dir,
7641 new_file_storage_dir))
7643 _StartInstanceDisks(self, inst, None)
7644 # update info on disks
7645 info = _GetInstanceInfoText(inst)
7646 for (idx, disk) in enumerate(inst.disks):
7647 for node in inst.all_nodes:
7648 self.cfg.SetDiskID(disk, node)
7649 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7651 self.LogWarning("Error setting info on node %s for disk %s: %s",
7652 node, idx, result.fail_msg)
7654 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7655 old_name, self.op.debug_level)
7656 msg = result.fail_msg
7658 msg = ("Could not run OS rename script for instance %s on node %s"
7659 " (but the instance has been renamed in Ganeti): %s" %
7660 (inst.name, inst.primary_node, msg))
7661 self.LogWarning(msg)
7663 _ShutdownInstanceDisks(self, inst)
7668 class LUInstanceRemove(LogicalUnit):
7669 """Remove an instance.
7672 HPATH = "instance-remove"
7673 HTYPE = constants.HTYPE_INSTANCE
7676 def ExpandNames(self):
7677 self._ExpandAndLockInstance()
7678 self.needed_locks[locking.LEVEL_NODE] = []
7679 self.needed_locks[locking.LEVEL_NODE_RES] = []
7680 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7682 def DeclareLocks(self, level):
7683 if level == locking.LEVEL_NODE:
7684 self._LockInstancesNodes()
7685 elif level == locking.LEVEL_NODE_RES:
7687 self.needed_locks[locking.LEVEL_NODE_RES] = \
7688 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7690 def BuildHooksEnv(self):
7693 This runs on master, primary and secondary nodes of the instance.
7696 env = _BuildInstanceHookEnvByObject(self, self.instance)
7697 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7700 def BuildHooksNodes(self):
7701 """Build hooks nodes.
7704 nl = [self.cfg.GetMasterNode()]
7705 nl_post = list(self.instance.all_nodes) + nl
7706 return (nl, nl_post)
7708 def CheckPrereq(self):
7709 """Check prerequisites.
7711 This checks that the instance is in the cluster.
7714 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7715 assert self.instance is not None, \
7716 "Cannot retrieve locked instance %s" % self.op.instance_name
7718 def Exec(self, feedback_fn):
7719 """Remove the instance.
7722 instance = self.instance
7723 logging.info("Shutting down instance %s on node %s",
7724 instance.name, instance.primary_node)
7726 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7727 self.op.shutdown_timeout)
7728 msg = result.fail_msg
7730 if self.op.ignore_failures:
7731 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7733 raise errors.OpExecError("Could not shutdown instance %s on"
7735 (instance.name, instance.primary_node, msg))
7737 assert (self.owned_locks(locking.LEVEL_NODE) ==
7738 self.owned_locks(locking.LEVEL_NODE_RES))
7739 assert not (set(instance.all_nodes) -
7740 self.owned_locks(locking.LEVEL_NODE)), \
7741 "Not owning correct locks"
7743 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7746 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7747 """Utility function to remove an instance.
7750 logging.info("Removing block devices for instance %s", instance.name)
7752 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7753 if not ignore_failures:
7754 raise errors.OpExecError("Can't remove instance's disks")
7755 feedback_fn("Warning: can't remove instance's disks")
7757 logging.info("Removing instance %s out of cluster config", instance.name)
7759 lu.cfg.RemoveInstance(instance.name)
7761 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7762 "Instance lock removal conflict"
7764 # Remove lock for the instance
7765 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7768 class LUInstanceQuery(NoHooksLU):
7769 """Logical unit for querying instances.
7772 # pylint: disable=W0142
7775 def CheckArguments(self):
7776 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7777 self.op.output_fields, self.op.use_locking)
7779 def ExpandNames(self):
7780 self.iq.ExpandNames(self)
7782 def DeclareLocks(self, level):
7783 self.iq.DeclareLocks(self, level)
7785 def Exec(self, feedback_fn):
7786 return self.iq.OldStyleQuery(self)
7789 def _ExpandNamesForMigration(lu):
7790 """Expands names for use with L{TLMigrateInstance}.
7792 @type lu: L{LogicalUnit}
7795 if lu.op.target_node is not None:
7796 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7798 lu.needed_locks[locking.LEVEL_NODE] = []
7799 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7801 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7802 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7805 def _DeclareLocksForMigration(lu, level):
7806 """Declares locks for L{TLMigrateInstance}.
7808 @type lu: L{LogicalUnit}
7809 @param level: Lock level
7812 if level == locking.LEVEL_NODE:
7813 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7814 if instance.disk_template in constants.DTS_EXT_MIRROR:
7815 if lu.op.target_node is None:
7816 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7818 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7820 del lu.recalculate_locks[locking.LEVEL_NODE]
7822 lu._LockInstancesNodes() # pylint: disable=W0212
7823 elif level == locking.LEVEL_NODE_RES:
7825 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7826 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7829 class LUInstanceFailover(LogicalUnit):
7830 """Failover an instance.
7833 HPATH = "instance-failover"
7834 HTYPE = constants.HTYPE_INSTANCE
7837 def CheckArguments(self):
7838 """Check the arguments.
7841 self.iallocator = getattr(self.op, "iallocator", None)
7842 self.target_node = getattr(self.op, "target_node", None)
7844 def ExpandNames(self):
7845 self._ExpandAndLockInstance()
7846 _ExpandNamesForMigration(self)
7849 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7850 self.op.ignore_consistency, True,
7851 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7853 self.tasklets = [self._migrater]
7855 def DeclareLocks(self, level):
7856 _DeclareLocksForMigration(self, level)
7858 def BuildHooksEnv(self):
7861 This runs on master, primary and secondary nodes of the instance.
7864 instance = self._migrater.instance
7865 source_node = instance.primary_node
7866 target_node = self.op.target_node
7868 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7869 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7870 "OLD_PRIMARY": source_node,
7871 "NEW_PRIMARY": target_node,
7874 if instance.disk_template in constants.DTS_INT_MIRROR:
7875 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7876 env["NEW_SECONDARY"] = source_node
7878 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7880 env.update(_BuildInstanceHookEnvByObject(self, instance))
7884 def BuildHooksNodes(self):
7885 """Build hooks nodes.
7888 instance = self._migrater.instance
7889 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7890 return (nl, nl + [instance.primary_node])
7893 class LUInstanceMigrate(LogicalUnit):
7894 """Migrate an instance.
7896 This is migration without shutting down, compared to the failover,
7897 which is done with shutdown.
7900 HPATH = "instance-migrate"
7901 HTYPE = constants.HTYPE_INSTANCE
7904 def ExpandNames(self):
7905 self._ExpandAndLockInstance()
7906 _ExpandNamesForMigration(self)
7909 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7910 False, self.op.allow_failover, False,
7911 self.op.allow_runtime_changes,
7912 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7913 self.op.ignore_ipolicy)
7915 self.tasklets = [self._migrater]
7917 def DeclareLocks(self, level):
7918 _DeclareLocksForMigration(self, level)
7920 def BuildHooksEnv(self):
7923 This runs on master, primary and secondary nodes of the instance.
7926 instance = self._migrater.instance
7927 source_node = instance.primary_node
7928 target_node = self.op.target_node
7929 env = _BuildInstanceHookEnvByObject(self, instance)
7931 "MIGRATE_LIVE": self._migrater.live,
7932 "MIGRATE_CLEANUP": self.op.cleanup,
7933 "OLD_PRIMARY": source_node,
7934 "NEW_PRIMARY": target_node,
7935 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7938 if instance.disk_template in constants.DTS_INT_MIRROR:
7939 env["OLD_SECONDARY"] = target_node
7940 env["NEW_SECONDARY"] = source_node
7942 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7946 def BuildHooksNodes(self):
7947 """Build hooks nodes.
7950 instance = self._migrater.instance
7951 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7952 return (nl, nl + [instance.primary_node])
7955 class LUInstanceMove(LogicalUnit):
7956 """Move an instance by data-copying.
7959 HPATH = "instance-move"
7960 HTYPE = constants.HTYPE_INSTANCE
7963 def ExpandNames(self):
7964 self._ExpandAndLockInstance()
7965 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7966 self.op.target_node = target_node
7967 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7968 self.needed_locks[locking.LEVEL_NODE_RES] = []
7969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7971 def DeclareLocks(self, level):
7972 if level == locking.LEVEL_NODE:
7973 self._LockInstancesNodes(primary_only=True)
7974 elif level == locking.LEVEL_NODE_RES:
7976 self.needed_locks[locking.LEVEL_NODE_RES] = \
7977 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7979 def BuildHooksEnv(self):
7982 This runs on master, primary and secondary nodes of the instance.
7986 "TARGET_NODE": self.op.target_node,
7987 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7989 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7992 def BuildHooksNodes(self):
7993 """Build hooks nodes.
7997 self.cfg.GetMasterNode(),
7998 self.instance.primary_node,
7999 self.op.target_node,
8003 def CheckPrereq(self):
8004 """Check prerequisites.
8006 This checks that the instance is in the cluster.
8009 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8010 assert self.instance is not None, \
8011 "Cannot retrieve locked instance %s" % self.op.instance_name
8013 node = self.cfg.GetNodeInfo(self.op.target_node)
8014 assert node is not None, \
8015 "Cannot retrieve locked node %s" % self.op.target_node
8017 self.target_node = target_node = node.name
8019 if target_node == instance.primary_node:
8020 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8021 (instance.name, target_node),
8024 bep = self.cfg.GetClusterInfo().FillBE(instance)
8026 for idx, dsk in enumerate(instance.disks):
8027 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8028 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8029 " cannot copy" % idx, errors.ECODE_STATE)
8031 _CheckNodeOnline(self, target_node)
8032 _CheckNodeNotDrained(self, target_node)
8033 _CheckNodeVmCapable(self, target_node)
8034 cluster = self.cfg.GetClusterInfo()
8035 group_info = self.cfg.GetNodeGroup(node.group)
8036 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8037 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8038 ignore=self.op.ignore_ipolicy)
8040 if instance.admin_state == constants.ADMINST_UP:
8041 # check memory requirements on the secondary node
8042 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8043 instance.name, bep[constants.BE_MAXMEM],
8044 instance.hypervisor)
8046 self.LogInfo("Not checking memory on the secondary node as"
8047 " instance will not be started")
8049 # check bridge existance
8050 _CheckInstanceBridgesExist(self, instance, node=target_node)
8052 def Exec(self, feedback_fn):
8053 """Move an instance.
8055 The move is done by shutting it down on its present node, copying
8056 the data over (slow) and starting it on the new node.
8059 instance = self.instance
8061 source_node = instance.primary_node
8062 target_node = self.target_node
8064 self.LogInfo("Shutting down instance %s on source node %s",
8065 instance.name, source_node)
8067 assert (self.owned_locks(locking.LEVEL_NODE) ==
8068 self.owned_locks(locking.LEVEL_NODE_RES))
8070 result = self.rpc.call_instance_shutdown(source_node, instance,
8071 self.op.shutdown_timeout)
8072 msg = result.fail_msg
8074 if self.op.ignore_consistency:
8075 self.LogWarning("Could not shutdown instance %s on node %s."
8076 " Proceeding anyway. Please make sure node"
8077 " %s is down. Error details: %s",
8078 instance.name, source_node, source_node, msg)
8080 raise errors.OpExecError("Could not shutdown instance %s on"
8082 (instance.name, source_node, msg))
8084 # create the target disks
8086 _CreateDisks(self, instance, target_node=target_node)
8087 except errors.OpExecError:
8088 self.LogWarning("Device creation failed, reverting...")
8090 _RemoveDisks(self, instance, target_node=target_node)
8092 self.cfg.ReleaseDRBDMinors(instance.name)
8095 cluster_name = self.cfg.GetClusterInfo().cluster_name
8098 # activate, get path, copy the data over
8099 for idx, disk in enumerate(instance.disks):
8100 self.LogInfo("Copying data for disk %d", idx)
8101 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8102 instance.name, True, idx)
8104 self.LogWarning("Can't assemble newly created disk %d: %s",
8105 idx, result.fail_msg)
8106 errs.append(result.fail_msg)
8108 dev_path = result.payload
8109 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8110 target_node, dev_path,
8113 self.LogWarning("Can't copy data over for disk %d: %s",
8114 idx, result.fail_msg)
8115 errs.append(result.fail_msg)
8119 self.LogWarning("Some disks failed to copy, aborting")
8121 _RemoveDisks(self, instance, target_node=target_node)
8123 self.cfg.ReleaseDRBDMinors(instance.name)
8124 raise errors.OpExecError("Errors during disk copy: %s" %
8127 instance.primary_node = target_node
8128 self.cfg.Update(instance, feedback_fn)
8130 self.LogInfo("Removing the disks on the original node")
8131 _RemoveDisks(self, instance, target_node=source_node)
8133 # Only start the instance if it's marked as up
8134 if instance.admin_state == constants.ADMINST_UP:
8135 self.LogInfo("Starting instance %s on node %s",
8136 instance.name, target_node)
8138 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8139 ignore_secondaries=True)
8141 _ShutdownInstanceDisks(self, instance)
8142 raise errors.OpExecError("Can't activate the instance's disks")
8144 result = self.rpc.call_instance_start(target_node,
8145 (instance, None, None), False)
8146 msg = result.fail_msg
8148 _ShutdownInstanceDisks(self, instance)
8149 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8150 (instance.name, target_node, msg))
8153 class LUNodeMigrate(LogicalUnit):
8154 """Migrate all instances from a node.
8157 HPATH = "node-migrate"
8158 HTYPE = constants.HTYPE_NODE
8161 def CheckArguments(self):
8164 def ExpandNames(self):
8165 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8167 self.share_locks = _ShareAll()
8168 self.needed_locks = {
8169 locking.LEVEL_NODE: [self.op.node_name],
8172 def BuildHooksEnv(self):
8175 This runs on the master, the primary and all the secondaries.
8179 "NODE_NAME": self.op.node_name,
8180 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8183 def BuildHooksNodes(self):
8184 """Build hooks nodes.
8187 nl = [self.cfg.GetMasterNode()]
8190 def CheckPrereq(self):
8193 def Exec(self, feedback_fn):
8194 # Prepare jobs for migration instances
8195 allow_runtime_changes = self.op.allow_runtime_changes
8197 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8200 iallocator=self.op.iallocator,
8201 target_node=self.op.target_node,
8202 allow_runtime_changes=allow_runtime_changes,
8203 ignore_ipolicy=self.op.ignore_ipolicy)]
8204 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8206 # TODO: Run iallocator in this opcode and pass correct placement options to
8207 # OpInstanceMigrate. Since other jobs can modify the cluster between
8208 # running the iallocator and the actual migration, a good consistency model
8209 # will have to be found.
8211 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8212 frozenset([self.op.node_name]))
8214 return ResultWithJobs(jobs)
8217 class TLMigrateInstance(Tasklet):
8218 """Tasklet class for instance migration.
8221 @ivar live: whether the migration will be done live or non-live;
8222 this variable is initalized only after CheckPrereq has run
8223 @type cleanup: boolean
8224 @ivar cleanup: Wheater we cleanup from a failed migration
8225 @type iallocator: string
8226 @ivar iallocator: The iallocator used to determine target_node
8227 @type target_node: string
8228 @ivar target_node: If given, the target_node to reallocate the instance to
8229 @type failover: boolean
8230 @ivar failover: Whether operation results in failover or migration
8231 @type fallback: boolean
8232 @ivar fallback: Whether fallback to failover is allowed if migration not
8234 @type ignore_consistency: boolean
8235 @ivar ignore_consistency: Wheter we should ignore consistency between source
8237 @type shutdown_timeout: int
8238 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8239 @type ignore_ipolicy: bool
8240 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8245 _MIGRATION_POLL_INTERVAL = 1 # seconds
8246 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8248 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8249 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8251 """Initializes this class.
8254 Tasklet.__init__(self, lu)
8257 self.instance_name = instance_name
8258 self.cleanup = cleanup
8259 self.live = False # will be overridden later
8260 self.failover = failover
8261 self.fallback = fallback
8262 self.ignore_consistency = ignore_consistency
8263 self.shutdown_timeout = shutdown_timeout
8264 self.ignore_ipolicy = ignore_ipolicy
8265 self.allow_runtime_changes = allow_runtime_changes
8267 def CheckPrereq(self):
8268 """Check prerequisites.
8270 This checks that the instance is in the cluster.
8273 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8274 instance = self.cfg.GetInstanceInfo(instance_name)
8275 assert instance is not None
8276 self.instance = instance
8277 cluster = self.cfg.GetClusterInfo()
8279 if (not self.cleanup and
8280 not instance.admin_state == constants.ADMINST_UP and
8281 not self.failover and self.fallback):
8282 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8283 " switching to failover")
8284 self.failover = True
8286 if instance.disk_template not in constants.DTS_MIRRORED:
8291 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8292 " %s" % (instance.disk_template, text),
8295 if instance.disk_template in constants.DTS_EXT_MIRROR:
8296 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8298 if self.lu.op.iallocator:
8299 self._RunAllocator()
8301 # We set set self.target_node as it is required by
8303 self.target_node = self.lu.op.target_node
8305 # Check that the target node is correct in terms of instance policy
8306 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8307 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8308 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8310 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8311 ignore=self.ignore_ipolicy)
8313 # self.target_node is already populated, either directly or by the
8315 target_node = self.target_node
8316 if self.target_node == instance.primary_node:
8317 raise errors.OpPrereqError("Cannot migrate instance %s"
8318 " to its primary (%s)" %
8319 (instance.name, instance.primary_node),
8322 if len(self.lu.tasklets) == 1:
8323 # It is safe to release locks only when we're the only tasklet
8325 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8326 keep=[instance.primary_node, self.target_node])
8329 secondary_nodes = instance.secondary_nodes
8330 if not secondary_nodes:
8331 raise errors.ConfigurationError("No secondary node but using"
8332 " %s disk template" %
8333 instance.disk_template)
8334 target_node = secondary_nodes[0]
8335 if self.lu.op.iallocator or (self.lu.op.target_node and
8336 self.lu.op.target_node != target_node):
8338 text = "failed over"
8341 raise errors.OpPrereqError("Instances with disk template %s cannot"
8342 " be %s to arbitrary nodes"
8343 " (neither an iallocator nor a target"
8344 " node can be passed)" %
8345 (instance.disk_template, text),
8347 nodeinfo = self.cfg.GetNodeInfo(target_node)
8348 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8349 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8351 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8352 ignore=self.ignore_ipolicy)
8354 i_be = cluster.FillBE(instance)
8356 # check memory requirements on the secondary node
8357 if (not self.cleanup and
8358 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8359 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8360 "migrating instance %s" %
8362 i_be[constants.BE_MINMEM],
8363 instance.hypervisor)
8365 self.lu.LogInfo("Not checking memory on the secondary node as"
8366 " instance will not be started")
8368 # check if failover must be forced instead of migration
8369 if (not self.cleanup and not self.failover and
8370 i_be[constants.BE_ALWAYS_FAILOVER]):
8371 self.lu.LogInfo("Instance configured to always failover; fallback"
8373 self.failover = True
8375 # check bridge existance
8376 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8378 if not self.cleanup:
8379 _CheckNodeNotDrained(self.lu, target_node)
8380 if not self.failover:
8381 result = self.rpc.call_instance_migratable(instance.primary_node,
8383 if result.fail_msg and self.fallback:
8384 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8386 self.failover = True
8388 result.Raise("Can't migrate, please use failover",
8389 prereq=True, ecode=errors.ECODE_STATE)
8391 assert not (self.failover and self.cleanup)
8393 if not self.failover:
8394 if self.lu.op.live is not None and self.lu.op.mode is not None:
8395 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8396 " parameters are accepted",
8398 if self.lu.op.live is not None:
8400 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8402 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8403 # reset the 'live' parameter to None so that repeated
8404 # invocations of CheckPrereq do not raise an exception
8405 self.lu.op.live = None
8406 elif self.lu.op.mode is None:
8407 # read the default value from the hypervisor
8408 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8409 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8411 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8413 # Failover is never live
8416 if not (self.failover or self.cleanup):
8417 remote_info = self.rpc.call_instance_info(instance.primary_node,
8419 instance.hypervisor)
8420 remote_info.Raise("Error checking instance on node %s" %
8421 instance.primary_node)
8422 instance_running = bool(remote_info.payload)
8423 if instance_running:
8424 self.current_mem = int(remote_info.payload["memory"])
8426 def _RunAllocator(self):
8427 """Run the allocator based on input opcode.
8430 # FIXME: add a self.ignore_ipolicy option
8431 req = iallocator.IAReqRelocate(name=self.instance_name,
8432 relocate_from=[self.instance.primary_node])
8433 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8435 ial.Run(self.lu.op.iallocator)
8438 raise errors.OpPrereqError("Can't compute nodes using"
8439 " iallocator '%s': %s" %
8440 (self.lu.op.iallocator, ial.info),
8442 self.target_node = ial.result[0]
8443 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8444 self.instance_name, self.lu.op.iallocator,
8445 utils.CommaJoin(ial.result))
8447 def _WaitUntilSync(self):
8448 """Poll with custom rpc for disk sync.
8450 This uses our own step-based rpc call.
8453 self.feedback_fn("* wait until resync is done")
8457 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8459 (self.instance.disks,
8462 for node, nres in result.items():
8463 nres.Raise("Cannot resync disks on node %s" % node)
8464 node_done, node_percent = nres.payload
8465 all_done = all_done and node_done
8466 if node_percent is not None:
8467 min_percent = min(min_percent, node_percent)
8469 if min_percent < 100:
8470 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8473 def _EnsureSecondary(self, node):
8474 """Demote a node to secondary.
8477 self.feedback_fn("* switching node %s to secondary mode" % node)
8479 for dev in self.instance.disks:
8480 self.cfg.SetDiskID(dev, node)
8482 result = self.rpc.call_blockdev_close(node, self.instance.name,
8483 self.instance.disks)
8484 result.Raise("Cannot change disk to secondary on node %s" % node)
8486 def _GoStandalone(self):
8487 """Disconnect from the network.
8490 self.feedback_fn("* changing into standalone mode")
8491 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8492 self.instance.disks)
8493 for node, nres in result.items():
8494 nres.Raise("Cannot disconnect disks node %s" % node)
8496 def _GoReconnect(self, multimaster):
8497 """Reconnect to the network.
8503 msg = "single-master"
8504 self.feedback_fn("* changing disks into %s mode" % msg)
8505 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8506 (self.instance.disks, self.instance),
8507 self.instance.name, multimaster)
8508 for node, nres in result.items():
8509 nres.Raise("Cannot change disks config on node %s" % node)
8511 def _ExecCleanup(self):
8512 """Try to cleanup after a failed migration.
8514 The cleanup is done by:
8515 - check that the instance is running only on one node
8516 (and update the config if needed)
8517 - change disks on its secondary node to secondary
8518 - wait until disks are fully synchronized
8519 - disconnect from the network
8520 - change disks into single-master mode
8521 - wait again until disks are fully synchronized
8524 instance = self.instance
8525 target_node = self.target_node
8526 source_node = self.source_node
8528 # check running on only one node
8529 self.feedback_fn("* checking where the instance actually runs"
8530 " (if this hangs, the hypervisor might be in"
8532 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8533 for node, result in ins_l.items():
8534 result.Raise("Can't contact node %s" % node)
8536 runningon_source = instance.name in ins_l[source_node].payload
8537 runningon_target = instance.name in ins_l[target_node].payload
8539 if runningon_source and runningon_target:
8540 raise errors.OpExecError("Instance seems to be running on two nodes,"
8541 " or the hypervisor is confused; you will have"
8542 " to ensure manually that it runs only on one"
8543 " and restart this operation")
8545 if not (runningon_source or runningon_target):
8546 raise errors.OpExecError("Instance does not seem to be running at all;"
8547 " in this case it's safer to repair by"
8548 " running 'gnt-instance stop' to ensure disk"
8549 " shutdown, and then restarting it")
8551 if runningon_target:
8552 # the migration has actually succeeded, we need to update the config
8553 self.feedback_fn("* instance running on secondary node (%s),"
8554 " updating config" % target_node)
8555 instance.primary_node = target_node
8556 self.cfg.Update(instance, self.feedback_fn)
8557 demoted_node = source_node
8559 self.feedback_fn("* instance confirmed to be running on its"
8560 " primary node (%s)" % source_node)
8561 demoted_node = target_node
8563 if instance.disk_template in constants.DTS_INT_MIRROR:
8564 self._EnsureSecondary(demoted_node)
8566 self._WaitUntilSync()
8567 except errors.OpExecError:
8568 # we ignore here errors, since if the device is standalone, it
8569 # won't be able to sync
8571 self._GoStandalone()
8572 self._GoReconnect(False)
8573 self._WaitUntilSync()
8575 self.feedback_fn("* done")
8577 def _RevertDiskStatus(self):
8578 """Try to revert the disk status after a failed migration.
8581 target_node = self.target_node
8582 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8586 self._EnsureSecondary(target_node)
8587 self._GoStandalone()
8588 self._GoReconnect(False)
8589 self._WaitUntilSync()
8590 except errors.OpExecError, err:
8591 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8592 " please try to recover the instance manually;"
8593 " error '%s'" % str(err))
8595 def _AbortMigration(self):
8596 """Call the hypervisor code to abort a started migration.
8599 instance = self.instance
8600 target_node = self.target_node
8601 source_node = self.source_node
8602 migration_info = self.migration_info
8604 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8608 abort_msg = abort_result.fail_msg
8610 logging.error("Aborting migration failed on target node %s: %s",
8611 target_node, abort_msg)
8612 # Don't raise an exception here, as we stil have to try to revert the
8613 # disk status, even if this step failed.
8615 abort_result = self.rpc.call_instance_finalize_migration_src(
8616 source_node, instance, False, self.live)
8617 abort_msg = abort_result.fail_msg
8619 logging.error("Aborting migration failed on source node %s: %s",
8620 source_node, abort_msg)
8622 def _ExecMigration(self):
8623 """Migrate an instance.
8625 The migrate is done by:
8626 - change the disks into dual-master mode
8627 - wait until disks are fully synchronized again
8628 - migrate the instance
8629 - change disks on the new secondary node (the old primary) to secondary
8630 - wait until disks are fully synchronized
8631 - change disks into single-master mode
8634 instance = self.instance
8635 target_node = self.target_node
8636 source_node = self.source_node
8638 # Check for hypervisor version mismatch and warn the user.
8639 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8640 None, [self.instance.hypervisor])
8641 for ninfo in nodeinfo.values():
8642 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8644 (_, _, (src_info, )) = nodeinfo[source_node].payload
8645 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8647 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8648 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8649 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8650 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8651 if src_version != dst_version:
8652 self.feedback_fn("* warning: hypervisor version mismatch between"
8653 " source (%s) and target (%s) node" %
8654 (src_version, dst_version))
8656 self.feedback_fn("* checking disk consistency between source and target")
8657 for (idx, dev) in enumerate(instance.disks):
8658 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8659 raise errors.OpExecError("Disk %s is degraded or not fully"
8660 " synchronized on target node,"
8661 " aborting migration" % idx)
8663 if self.current_mem > self.tgt_free_mem:
8664 if not self.allow_runtime_changes:
8665 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8666 " free memory to fit instance %s on target"
8667 " node %s (have %dMB, need %dMB)" %
8668 (instance.name, target_node,
8669 self.tgt_free_mem, self.current_mem))
8670 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8671 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8674 rpcres.Raise("Cannot modify instance runtime memory")
8676 # First get the migration information from the remote node
8677 result = self.rpc.call_migration_info(source_node, instance)
8678 msg = result.fail_msg
8680 log_err = ("Failed fetching source migration information from %s: %s" %
8682 logging.error(log_err)
8683 raise errors.OpExecError(log_err)
8685 self.migration_info = migration_info = result.payload
8687 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8688 # Then switch the disks to master/master mode
8689 self._EnsureSecondary(target_node)
8690 self._GoStandalone()
8691 self._GoReconnect(True)
8692 self._WaitUntilSync()
8694 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8695 result = self.rpc.call_accept_instance(target_node,
8698 self.nodes_ip[target_node])
8700 msg = result.fail_msg
8702 logging.error("Instance pre-migration failed, trying to revert"
8703 " disk status: %s", msg)
8704 self.feedback_fn("Pre-migration failed, aborting")
8705 self._AbortMigration()
8706 self._RevertDiskStatus()
8707 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8708 (instance.name, msg))
8710 self.feedback_fn("* migrating instance to %s" % target_node)
8711 result = self.rpc.call_instance_migrate(source_node, instance,
8712 self.nodes_ip[target_node],
8714 msg = result.fail_msg
8716 logging.error("Instance migration failed, trying to revert"
8717 " disk status: %s", msg)
8718 self.feedback_fn("Migration failed, aborting")
8719 self._AbortMigration()
8720 self._RevertDiskStatus()
8721 raise errors.OpExecError("Could not migrate instance %s: %s" %
8722 (instance.name, msg))
8724 self.feedback_fn("* starting memory transfer")
8725 last_feedback = time.time()
8727 result = self.rpc.call_instance_get_migration_status(source_node,
8729 msg = result.fail_msg
8730 ms = result.payload # MigrationStatus instance
8731 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8732 logging.error("Instance migration failed, trying to revert"
8733 " disk status: %s", msg)
8734 self.feedback_fn("Migration failed, aborting")
8735 self._AbortMigration()
8736 self._RevertDiskStatus()
8738 msg = "hypervisor returned failure"
8739 raise errors.OpExecError("Could not migrate instance %s: %s" %
8740 (instance.name, msg))
8742 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8743 self.feedback_fn("* memory transfer complete")
8746 if (utils.TimeoutExpired(last_feedback,
8747 self._MIGRATION_FEEDBACK_INTERVAL) and
8748 ms.transferred_ram is not None):
8749 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8750 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8751 last_feedback = time.time()
8753 time.sleep(self._MIGRATION_POLL_INTERVAL)
8755 result = self.rpc.call_instance_finalize_migration_src(source_node,
8759 msg = result.fail_msg
8761 logging.error("Instance migration succeeded, but finalization failed"
8762 " on the source node: %s", msg)
8763 raise errors.OpExecError("Could not finalize instance migration: %s" %
8766 instance.primary_node = target_node
8768 # distribute new instance config to the other nodes
8769 self.cfg.Update(instance, self.feedback_fn)
8771 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8775 msg = result.fail_msg
8777 logging.error("Instance migration succeeded, but finalization failed"
8778 " on the target node: %s", msg)
8779 raise errors.OpExecError("Could not finalize instance migration: %s" %
8782 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8783 self._EnsureSecondary(source_node)
8784 self._WaitUntilSync()
8785 self._GoStandalone()
8786 self._GoReconnect(False)
8787 self._WaitUntilSync()
8789 # If the instance's disk template is `rbd' and there was a successful
8790 # migration, unmap the device from the source node.
8791 if self.instance.disk_template == constants.DT_RBD:
8792 disks = _ExpandCheckDisks(instance, instance.disks)
8793 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8795 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8796 msg = result.fail_msg
8798 logging.error("Migration was successful, but couldn't unmap the"
8799 " block device %s on source node %s: %s",
8800 disk.iv_name, source_node, msg)
8801 logging.error("You need to unmap the device %s manually on %s",
8802 disk.iv_name, source_node)
8804 self.feedback_fn("* done")
8806 def _ExecFailover(self):
8807 """Failover an instance.
8809 The failover is done by shutting it down on its present node and
8810 starting it on the secondary.
8813 instance = self.instance
8814 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8816 source_node = instance.primary_node
8817 target_node = self.target_node
8819 if instance.admin_state == constants.ADMINST_UP:
8820 self.feedback_fn("* checking disk consistency between source and target")
8821 for (idx, dev) in enumerate(instance.disks):
8822 # for drbd, these are drbd over lvm
8823 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8825 if primary_node.offline:
8826 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8828 (primary_node.name, idx, target_node))
8829 elif not self.ignore_consistency:
8830 raise errors.OpExecError("Disk %s is degraded on target node,"
8831 " aborting failover" % idx)
8833 self.feedback_fn("* not checking disk consistency as instance is not"
8836 self.feedback_fn("* shutting down instance on source node")
8837 logging.info("Shutting down instance %s on node %s",
8838 instance.name, source_node)
8840 result = self.rpc.call_instance_shutdown(source_node, instance,
8841 self.shutdown_timeout)
8842 msg = result.fail_msg
8844 if self.ignore_consistency or primary_node.offline:
8845 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8846 " proceeding anyway; please make sure node"
8847 " %s is down; error details: %s",
8848 instance.name, source_node, source_node, msg)
8850 raise errors.OpExecError("Could not shutdown instance %s on"
8852 (instance.name, source_node, msg))
8854 self.feedback_fn("* deactivating the instance's disks on source node")
8855 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8856 raise errors.OpExecError("Can't shut down the instance's disks")
8858 instance.primary_node = target_node
8859 # distribute new instance config to the other nodes
8860 self.cfg.Update(instance, self.feedback_fn)
8862 # Only start the instance if it's marked as up
8863 if instance.admin_state == constants.ADMINST_UP:
8864 self.feedback_fn("* activating the instance's disks on target node %s" %
8866 logging.info("Starting instance %s on node %s",
8867 instance.name, target_node)
8869 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8870 ignore_secondaries=True)
8872 _ShutdownInstanceDisks(self.lu, instance)
8873 raise errors.OpExecError("Can't activate the instance's disks")
8875 self.feedback_fn("* starting the instance on the target node %s" %
8877 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8879 msg = result.fail_msg
8881 _ShutdownInstanceDisks(self.lu, instance)
8882 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8883 (instance.name, target_node, msg))
8885 def Exec(self, feedback_fn):
8886 """Perform the migration.
8889 self.feedback_fn = feedback_fn
8890 self.source_node = self.instance.primary_node
8892 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8893 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8894 self.target_node = self.instance.secondary_nodes[0]
8895 # Otherwise self.target_node has been populated either
8896 # directly, or through an iallocator.
8898 self.all_nodes = [self.source_node, self.target_node]
8899 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8900 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8903 feedback_fn("Failover instance %s" % self.instance.name)
8904 self._ExecFailover()
8906 feedback_fn("Migrating instance %s" % self.instance.name)
8909 return self._ExecCleanup()
8911 return self._ExecMigration()
8914 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8916 """Wrapper around L{_CreateBlockDevInner}.
8918 This method annotates the root device first.
8921 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8922 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8926 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8928 """Create a tree of block devices on a given node.
8930 If this device type has to be created on secondaries, create it and
8933 If not, just recurse to children keeping the same 'force' value.
8935 @attention: The device has to be annotated already.
8937 @param lu: the lu on whose behalf we execute
8938 @param node: the node on which to create the device
8939 @type instance: L{objects.Instance}
8940 @param instance: the instance which owns the device
8941 @type device: L{objects.Disk}
8942 @param device: the device to create
8943 @type force_create: boolean
8944 @param force_create: whether to force creation of this device; this
8945 will be change to True whenever we find a device which has
8946 CreateOnSecondary() attribute
8947 @param info: the extra 'metadata' we should attach to the device
8948 (this will be represented as a LVM tag)
8949 @type force_open: boolean
8950 @param force_open: this parameter will be passes to the
8951 L{backend.BlockdevCreate} function where it specifies
8952 whether we run on primary or not, and it affects both
8953 the child assembly and the device own Open() execution
8956 if device.CreateOnSecondary():
8960 for child in device.children:
8961 _CreateBlockDevInner(lu, node, instance, child, force_create,
8964 if not force_create:
8967 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8970 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8971 """Create a single block device on a given node.
8973 This will not recurse over children of the device, so they must be
8976 @param lu: the lu on whose behalf we execute
8977 @param node: the node on which to create the device
8978 @type instance: L{objects.Instance}
8979 @param instance: the instance which owns the device
8980 @type device: L{objects.Disk}
8981 @param device: the device to create
8982 @param info: the extra 'metadata' we should attach to the device
8983 (this will be represented as a LVM tag)
8984 @type force_open: boolean
8985 @param force_open: this parameter will be passes to the
8986 L{backend.BlockdevCreate} function where it specifies
8987 whether we run on primary or not, and it affects both
8988 the child assembly and the device own Open() execution
8991 lu.cfg.SetDiskID(device, node)
8992 result = lu.rpc.call_blockdev_create(node, device, device.size,
8993 instance.name, force_open, info)
8994 result.Raise("Can't create block device %s on"
8995 " node %s for instance %s" % (device, node, instance.name))
8996 if device.physical_id is None:
8997 device.physical_id = result.payload
9000 def _GenerateUniqueNames(lu, exts):
9001 """Generate a suitable LV name.
9003 This will generate a logical volume name for the given instance.
9008 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9009 results.append("%s%s" % (new_id, val))
9013 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9014 iv_name, p_minor, s_minor):
9015 """Generate a drbd8 device complete with its children.
9018 assert len(vgnames) == len(names) == 2
9019 port = lu.cfg.AllocatePort()
9020 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9022 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9023 logical_id=(vgnames[0], names[0]),
9025 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9026 size=constants.DRBD_META_SIZE,
9027 logical_id=(vgnames[1], names[1]),
9029 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9030 logical_id=(primary, secondary, port,
9033 children=[dev_data, dev_meta],
9034 iv_name=iv_name, params={})
9038 _DISK_TEMPLATE_NAME_PREFIX = {
9039 constants.DT_PLAIN: "",
9040 constants.DT_RBD: ".rbd",
9044 _DISK_TEMPLATE_DEVICE_TYPE = {
9045 constants.DT_PLAIN: constants.LD_LV,
9046 constants.DT_FILE: constants.LD_FILE,
9047 constants.DT_SHARED_FILE: constants.LD_FILE,
9048 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9049 constants.DT_RBD: constants.LD_RBD,
9053 def _GenerateDiskTemplate(
9054 lu, template_name, instance_name, primary_node, secondary_nodes,
9055 disk_info, file_storage_dir, file_driver, base_index,
9056 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9057 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9058 """Generate the entire disk layout for a given template type.
9061 #TODO: compute space requirements
9063 vgname = lu.cfg.GetVGName()
9064 disk_count = len(disk_info)
9067 if template_name == constants.DT_DISKLESS:
9069 elif template_name == constants.DT_DRBD8:
9070 if len(secondary_nodes) != 1:
9071 raise errors.ProgrammerError("Wrong template configuration")
9072 remote_node = secondary_nodes[0]
9073 minors = lu.cfg.AllocateDRBDMinor(
9074 [primary_node, remote_node] * len(disk_info), instance_name)
9076 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9078 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9081 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9082 for i in range(disk_count)]):
9083 names.append(lv_prefix + "_data")
9084 names.append(lv_prefix + "_meta")
9085 for idx, disk in enumerate(disk_info):
9086 disk_index = idx + base_index
9087 data_vg = disk.get(constants.IDISK_VG, vgname)
9088 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9089 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9090 disk[constants.IDISK_SIZE],
9092 names[idx * 2:idx * 2 + 2],
9093 "disk/%d" % disk_index,
9094 minors[idx * 2], minors[idx * 2 + 1])
9095 disk_dev.mode = disk[constants.IDISK_MODE]
9096 disks.append(disk_dev)
9099 raise errors.ProgrammerError("Wrong template configuration")
9101 if template_name == constants.DT_FILE:
9103 elif template_name == constants.DT_SHARED_FILE:
9104 _req_shr_file_storage()
9106 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9107 if name_prefix is None:
9110 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9111 (name_prefix, base_index + i)
9112 for i in range(disk_count)])
9114 if template_name == constants.DT_PLAIN:
9116 def logical_id_fn(idx, _, disk):
9117 vg = disk.get(constants.IDISK_VG, vgname)
9118 return (vg, names[idx])
9120 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9122 lambda _, disk_index, disk: (file_driver,
9123 "%s/disk%d" % (file_storage_dir,
9125 elif template_name == constants.DT_BLOCK:
9127 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9128 disk[constants.IDISK_ADOPT])
9129 elif template_name == constants.DT_RBD:
9130 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9132 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9134 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9136 for idx, disk in enumerate(disk_info):
9137 disk_index = idx + base_index
9138 size = disk[constants.IDISK_SIZE]
9139 feedback_fn("* disk %s, size %s" %
9140 (disk_index, utils.FormatUnit(size, "h")))
9141 disks.append(objects.Disk(dev_type=dev_type, size=size,
9142 logical_id=logical_id_fn(idx, disk_index, disk),
9143 iv_name="disk/%d" % disk_index,
9144 mode=disk[constants.IDISK_MODE],
9150 def _GetInstanceInfoText(instance):
9151 """Compute that text that should be added to the disk's metadata.
9154 return "originstname+%s" % instance.name
9157 def _CalcEta(time_taken, written, total_size):
9158 """Calculates the ETA based on size written and total size.
9160 @param time_taken: The time taken so far
9161 @param written: amount written so far
9162 @param total_size: The total size of data to be written
9163 @return: The remaining time in seconds
9166 avg_time = time_taken / float(written)
9167 return (total_size - written) * avg_time
9170 def _WipeDisks(lu, instance, disks=None):
9171 """Wipes instance disks.
9173 @type lu: L{LogicalUnit}
9174 @param lu: the logical unit on whose behalf we execute
9175 @type instance: L{objects.Instance}
9176 @param instance: the instance whose disks we should create
9177 @return: the success of the wipe
9180 node = instance.primary_node
9183 disks = [(idx, disk, 0)
9184 for (idx, disk) in enumerate(instance.disks)]
9186 for (_, device, _) in disks:
9187 lu.cfg.SetDiskID(device, node)
9189 logging.info("Pausing synchronization of disks of instance '%s'",
9191 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9192 (map(compat.snd, disks),
9195 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9197 for idx, success in enumerate(result.payload):
9199 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9200 " failed", idx, instance.name)
9203 for (idx, device, offset) in disks:
9204 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9205 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9207 int(min(constants.MAX_WIPE_CHUNK,
9208 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9212 start_time = time.time()
9217 info_text = (" (from %s to %s)" %
9218 (utils.FormatUnit(offset, "h"),
9219 utils.FormatUnit(size, "h")))
9221 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9223 logging.info("Wiping disk %d for instance %s on node %s using"
9224 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9226 while offset < size:
9227 wipe_size = min(wipe_chunk_size, size - offset)
9229 logging.debug("Wiping disk %d, offset %s, chunk %s",
9230 idx, offset, wipe_size)
9232 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9234 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9235 (idx, offset, wipe_size))
9239 if now - last_output >= 60:
9240 eta = _CalcEta(now - start_time, offset, size)
9241 lu.LogInfo(" - done: %.1f%% ETA: %s",
9242 offset / float(size) * 100, utils.FormatSeconds(eta))
9245 logging.info("Resuming synchronization of disks for instance '%s'",
9248 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9249 (map(compat.snd, disks),
9254 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9255 node, result.fail_msg)
9257 for idx, success in enumerate(result.payload):
9259 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9260 " failed", idx, instance.name)
9263 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9264 """Create all disks for an instance.
9266 This abstracts away some work from AddInstance.
9268 @type lu: L{LogicalUnit}
9269 @param lu: the logical unit on whose behalf we execute
9270 @type instance: L{objects.Instance}
9271 @param instance: the instance whose disks we should create
9273 @param to_skip: list of indices to skip
9274 @type target_node: string
9275 @param target_node: if passed, overrides the target node for creation
9277 @return: the success of the creation
9280 info = _GetInstanceInfoText(instance)
9281 if target_node is None:
9282 pnode = instance.primary_node
9283 all_nodes = instance.all_nodes
9288 if instance.disk_template in constants.DTS_FILEBASED:
9289 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9290 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9292 result.Raise("Failed to create directory '%s' on"
9293 " node %s" % (file_storage_dir, pnode))
9295 # Note: this needs to be kept in sync with adding of disks in
9296 # LUInstanceSetParams
9297 for idx, device in enumerate(instance.disks):
9298 if to_skip and idx in to_skip:
9300 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9302 for node in all_nodes:
9303 f_create = node == pnode
9304 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9307 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9308 """Remove all disks for an instance.
9310 This abstracts away some work from `AddInstance()` and
9311 `RemoveInstance()`. Note that in case some of the devices couldn't
9312 be removed, the removal will continue with the other ones (compare
9313 with `_CreateDisks()`).
9315 @type lu: L{LogicalUnit}
9316 @param lu: the logical unit on whose behalf we execute
9317 @type instance: L{objects.Instance}
9318 @param instance: the instance whose disks we should remove
9319 @type target_node: string
9320 @param target_node: used to override the node on which to remove the disks
9322 @return: the success of the removal
9325 logging.info("Removing block devices for instance %s", instance.name)
9328 ports_to_release = set()
9329 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9330 for (idx, device) in enumerate(anno_disks):
9332 edata = [(target_node, device)]
9334 edata = device.ComputeNodeTree(instance.primary_node)
9335 for node, disk in edata:
9336 lu.cfg.SetDiskID(disk, node)
9337 result = lu.rpc.call_blockdev_remove(node, disk)
9339 lu.LogWarning("Could not remove disk %s on node %s,"
9340 " continuing anyway: %s", idx, node, result.fail_msg)
9341 if not (result.offline and node != instance.primary_node):
9344 # if this is a DRBD disk, return its port to the pool
9345 if device.dev_type in constants.LDS_DRBD:
9346 ports_to_release.add(device.logical_id[2])
9348 if all_result or ignore_failures:
9349 for port in ports_to_release:
9350 lu.cfg.AddTcpUdpPort(port)
9352 if instance.disk_template in constants.DTS_FILEBASED:
9353 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9357 tgt = instance.primary_node
9358 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9360 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9361 file_storage_dir, instance.primary_node, result.fail_msg)
9367 def _ComputeDiskSizePerVG(disk_template, disks):
9368 """Compute disk size requirements in the volume group
9371 def _compute(disks, payload):
9372 """Universal algorithm.
9377 vgs[disk[constants.IDISK_VG]] = \
9378 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9382 # Required free disk space as a function of disk and swap space
9384 constants.DT_DISKLESS: {},
9385 constants.DT_PLAIN: _compute(disks, 0),
9386 # 128 MB are added for drbd metadata for each disk
9387 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9388 constants.DT_FILE: {},
9389 constants.DT_SHARED_FILE: {},
9392 if disk_template not in req_size_dict:
9393 raise errors.ProgrammerError("Disk template '%s' size requirement"
9394 " is unknown" % disk_template)
9396 return req_size_dict[disk_template]
9399 def _FilterVmNodes(lu, nodenames):
9400 """Filters out non-vm_capable nodes from a list.
9402 @type lu: L{LogicalUnit}
9403 @param lu: the logical unit for which we check
9404 @type nodenames: list
9405 @param nodenames: the list of nodes on which we should check
9407 @return: the list of vm-capable nodes
9410 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9411 return [name for name in nodenames if name not in vm_nodes]
9414 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9415 """Hypervisor parameter validation.
9417 This function abstract the hypervisor parameter validation to be
9418 used in both instance create and instance modify.
9420 @type lu: L{LogicalUnit}
9421 @param lu: the logical unit for which we check
9422 @type nodenames: list
9423 @param nodenames: the list of nodes on which we should check
9424 @type hvname: string
9425 @param hvname: the name of the hypervisor we should use
9426 @type hvparams: dict
9427 @param hvparams: the parameters which we need to check
9428 @raise errors.OpPrereqError: if the parameters are not valid
9431 nodenames = _FilterVmNodes(lu, nodenames)
9433 cluster = lu.cfg.GetClusterInfo()
9434 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9436 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9437 for node in nodenames:
9441 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9444 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9445 """OS parameters validation.
9447 @type lu: L{LogicalUnit}
9448 @param lu: the logical unit for which we check
9449 @type required: boolean
9450 @param required: whether the validation should fail if the OS is not
9452 @type nodenames: list
9453 @param nodenames: the list of nodes on which we should check
9454 @type osname: string
9455 @param osname: the name of the hypervisor we should use
9456 @type osparams: dict
9457 @param osparams: the parameters which we need to check
9458 @raise errors.OpPrereqError: if the parameters are not valid
9461 nodenames = _FilterVmNodes(lu, nodenames)
9462 result = lu.rpc.call_os_validate(nodenames, required, osname,
9463 [constants.OS_VALIDATE_PARAMETERS],
9465 for node, nres in result.items():
9466 # we don't check for offline cases since this should be run only
9467 # against the master node and/or an instance's nodes
9468 nres.Raise("OS Parameters validation failed on node %s" % node)
9469 if not nres.payload:
9470 lu.LogInfo("OS %s not found on node %s, validation skipped",
9474 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9475 """Wrapper around IAReqInstanceAlloc.
9477 @param op: The instance opcode
9478 @param disks: The computed disks
9479 @param nics: The computed nics
9480 @param beparams: The full filled beparams
9482 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9485 spindle_use = beparams[constants.BE_SPINDLE_USE]
9486 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9487 disk_template=op.disk_template,
9490 vcpus=beparams[constants.BE_VCPUS],
9491 memory=beparams[constants.BE_MAXMEM],
9492 spindle_use=spindle_use,
9494 nics=[n.ToDict() for n in nics],
9495 hypervisor=op.hypervisor)
9498 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9499 """Computes the nics.
9501 @param op: The instance opcode
9502 @param cluster: Cluster configuration object
9503 @param default_ip: The default ip to assign
9504 @param cfg: An instance of the configuration object
9505 @param ec_id: Execution context ID
9507 @returns: The build up nics
9512 nic_mode_req = nic.get(constants.INIC_MODE, None)
9513 nic_mode = nic_mode_req
9514 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9515 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9517 net = nic.get(constants.INIC_NETWORK, None)
9518 link = nic.get(constants.NIC_LINK, None)
9519 ip = nic.get(constants.INIC_IP, None)
9521 if net is None or net.lower() == constants.VALUE_NONE:
9524 if nic_mode_req is not None or link is not None:
9525 raise errors.OpPrereqError("If network is given, no mode or link"
9526 " is allowed to be passed",
9529 # ip validity checks
9530 if ip is None or ip.lower() == constants.VALUE_NONE:
9532 elif ip.lower() == constants.VALUE_AUTO:
9533 if not op.name_check:
9534 raise errors.OpPrereqError("IP address set to auto but name checks"
9535 " have been skipped",
9539 # We defer pool operations until later, so that the iallocator has
9540 # filled in the instance's node(s) dimara
9541 if ip.lower() == constants.NIC_IP_POOL:
9543 raise errors.OpPrereqError("if ip=pool, parameter network"
9544 " must be passed too",
9547 elif not netutils.IPAddress.IsValid(ip):
9548 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9553 # TODO: check the ip address for uniqueness
9554 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9555 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9558 # MAC address verification
9559 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9560 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9561 mac = utils.NormalizeAndValidateMac(mac)
9564 # TODO: We need to factor this out
9565 cfg.ReserveMAC(mac, ec_id)
9566 except errors.ReservationError:
9567 raise errors.OpPrereqError("MAC address %s already in use"
9568 " in cluster" % mac,
9569 errors.ECODE_NOTUNIQUE)
9571 # Build nic parameters
9574 nicparams[constants.NIC_MODE] = nic_mode
9576 nicparams[constants.NIC_LINK] = link
9578 check_params = cluster.SimpleFillNIC(nicparams)
9579 objects.NIC.CheckParameterSyntax(check_params)
9580 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9581 network=net, nicparams=nicparams))
9586 def _ComputeDisks(op, default_vg):
9587 """Computes the instance disks.
9589 @param op: The instance opcode
9590 @param default_vg: The default_vg to assume
9592 @return: The computer disks
9596 for disk in op.disks:
9597 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9598 if mode not in constants.DISK_ACCESS_SET:
9599 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9600 mode, errors.ECODE_INVAL)
9601 size = disk.get(constants.IDISK_SIZE, None)
9603 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9606 except (TypeError, ValueError):
9607 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9610 data_vg = disk.get(constants.IDISK_VG, default_vg)
9612 constants.IDISK_SIZE: size,
9613 constants.IDISK_MODE: mode,
9614 constants.IDISK_VG: data_vg,
9616 if constants.IDISK_METAVG in disk:
9617 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9618 if constants.IDISK_ADOPT in disk:
9619 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9620 disks.append(new_disk)
9625 def _ComputeFullBeParams(op, cluster):
9626 """Computes the full beparams.
9628 @param op: The instance opcode
9629 @param cluster: The cluster config object
9631 @return: The fully filled beparams
9634 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9635 for param, value in op.beparams.iteritems():
9636 if value == constants.VALUE_AUTO:
9637 op.beparams[param] = default_beparams[param]
9638 objects.UpgradeBeParams(op.beparams)
9639 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9640 return cluster.SimpleFillBE(op.beparams)
9643 class LUInstanceCreate(LogicalUnit):
9644 """Create an instance.
9647 HPATH = "instance-add"
9648 HTYPE = constants.HTYPE_INSTANCE
9651 def CheckArguments(self):
9655 # do not require name_check to ease forward/backward compatibility
9657 if self.op.no_install and self.op.start:
9658 self.LogInfo("No-installation mode selected, disabling startup")
9659 self.op.start = False
9660 # validate/normalize the instance name
9661 self.op.instance_name = \
9662 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9664 if self.op.ip_check and not self.op.name_check:
9665 # TODO: make the ip check more flexible and not depend on the name check
9666 raise errors.OpPrereqError("Cannot do IP address check without a name"
9667 " check", errors.ECODE_INVAL)
9669 # check nics' parameter names
9670 for nic in self.op.nics:
9671 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9673 # check disks. parameter names and consistent adopt/no-adopt strategy
9674 has_adopt = has_no_adopt = False
9675 for disk in self.op.disks:
9676 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9677 if constants.IDISK_ADOPT in disk:
9681 if has_adopt and has_no_adopt:
9682 raise errors.OpPrereqError("Either all disks are adopted or none is",
9685 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9686 raise errors.OpPrereqError("Disk adoption is not supported for the"
9687 " '%s' disk template" %
9688 self.op.disk_template,
9690 if self.op.iallocator is not None:
9691 raise errors.OpPrereqError("Disk adoption not allowed with an"
9692 " iallocator script", errors.ECODE_INVAL)
9693 if self.op.mode == constants.INSTANCE_IMPORT:
9694 raise errors.OpPrereqError("Disk adoption not allowed for"
9695 " instance import", errors.ECODE_INVAL)
9697 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9698 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9699 " but no 'adopt' parameter given" %
9700 self.op.disk_template,
9703 self.adopt_disks = has_adopt
9705 # instance name verification
9706 if self.op.name_check:
9707 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9708 self.op.instance_name = self.hostname1.name
9709 # used in CheckPrereq for ip ping check
9710 self.check_ip = self.hostname1.ip
9712 self.check_ip = None
9714 # file storage checks
9715 if (self.op.file_driver and
9716 not self.op.file_driver in constants.FILE_DRIVER):
9717 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9718 self.op.file_driver, errors.ECODE_INVAL)
9720 if self.op.disk_template == constants.DT_FILE:
9721 opcodes.RequireFileStorage()
9722 elif self.op.disk_template == constants.DT_SHARED_FILE:
9723 opcodes.RequireSharedFileStorage()
9725 ### Node/iallocator related checks
9726 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9728 if self.op.pnode is not None:
9729 if self.op.disk_template in constants.DTS_INT_MIRROR:
9730 if self.op.snode is None:
9731 raise errors.OpPrereqError("The networked disk templates need"
9732 " a mirror node", errors.ECODE_INVAL)
9734 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9736 self.op.snode = None
9738 self._cds = _GetClusterDomainSecret()
9740 if self.op.mode == constants.INSTANCE_IMPORT:
9741 # On import force_variant must be True, because if we forced it at
9742 # initial install, our only chance when importing it back is that it
9744 self.op.force_variant = True
9746 if self.op.no_install:
9747 self.LogInfo("No-installation mode has no effect during import")
9749 elif self.op.mode == constants.INSTANCE_CREATE:
9750 if self.op.os_type is None:
9751 raise errors.OpPrereqError("No guest OS specified",
9753 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9754 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9755 " installation" % self.op.os_type,
9757 if self.op.disk_template is None:
9758 raise errors.OpPrereqError("No disk template specified",
9761 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9762 # Check handshake to ensure both clusters have the same domain secret
9763 src_handshake = self.op.source_handshake
9764 if not src_handshake:
9765 raise errors.OpPrereqError("Missing source handshake",
9768 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9771 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9774 # Load and check source CA
9775 self.source_x509_ca_pem = self.op.source_x509_ca
9776 if not self.source_x509_ca_pem:
9777 raise errors.OpPrereqError("Missing source X509 CA",
9781 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9783 except OpenSSL.crypto.Error, err:
9784 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9785 (err, ), errors.ECODE_INVAL)
9787 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9788 if errcode is not None:
9789 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9792 self.source_x509_ca = cert
9794 src_instance_name = self.op.source_instance_name
9795 if not src_instance_name:
9796 raise errors.OpPrereqError("Missing source instance name",
9799 self.source_instance_name = \
9800 netutils.GetHostname(name=src_instance_name).name
9803 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9804 self.op.mode, errors.ECODE_INVAL)
9806 def ExpandNames(self):
9807 """ExpandNames for CreateInstance.
9809 Figure out the right locks for instance creation.
9812 self.needed_locks = {}
9814 instance_name = self.op.instance_name
9815 # this is just a preventive check, but someone might still add this
9816 # instance in the meantime, and creation will fail at lock-add time
9817 if instance_name in self.cfg.GetInstanceList():
9818 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9819 instance_name, errors.ECODE_EXISTS)
9821 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9823 if self.op.iallocator:
9824 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9825 # specifying a group on instance creation and then selecting nodes from
9827 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9828 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9830 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9831 nodelist = [self.op.pnode]
9832 if self.op.snode is not None:
9833 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9834 nodelist.append(self.op.snode)
9835 self.needed_locks[locking.LEVEL_NODE] = nodelist
9836 # Lock resources of instance's primary and secondary nodes (copy to
9837 # prevent accidential modification)
9838 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9840 # in case of import lock the source node too
9841 if self.op.mode == constants.INSTANCE_IMPORT:
9842 src_node = self.op.src_node
9843 src_path = self.op.src_path
9845 if src_path is None:
9846 self.op.src_path = src_path = self.op.instance_name
9848 if src_node is None:
9849 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9850 self.op.src_node = None
9851 if os.path.isabs(src_path):
9852 raise errors.OpPrereqError("Importing an instance from a path"
9853 " requires a source node option",
9856 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9857 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9858 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9859 if not os.path.isabs(src_path):
9860 self.op.src_path = src_path = \
9861 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9863 def _RunAllocator(self):
9864 """Run the allocator based on input opcode.
9867 #TODO Export network to iallocator so that it chooses a pnode
9868 # in a nodegroup that has the desired network connected to
9869 req = _CreateInstanceAllocRequest(self.op, self.disks,
9870 self.nics, self.be_full)
9871 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9873 ial.Run(self.op.iallocator)
9876 raise errors.OpPrereqError("Can't compute nodes using"
9877 " iallocator '%s': %s" %
9878 (self.op.iallocator, ial.info),
9880 self.op.pnode = ial.result[0]
9881 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9882 self.op.instance_name, self.op.iallocator,
9883 utils.CommaJoin(ial.result))
9885 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9887 if req.RequiredNodes() == 2:
9888 self.op.snode = ial.result[1]
9890 def BuildHooksEnv(self):
9893 This runs on master, primary and secondary nodes of the instance.
9897 "ADD_MODE": self.op.mode,
9899 if self.op.mode == constants.INSTANCE_IMPORT:
9900 env["SRC_NODE"] = self.op.src_node
9901 env["SRC_PATH"] = self.op.src_path
9902 env["SRC_IMAGES"] = self.src_images
9904 env.update(_BuildInstanceHookEnv(
9905 name=self.op.instance_name,
9906 primary_node=self.op.pnode,
9907 secondary_nodes=self.secondaries,
9908 status=self.op.start,
9909 os_type=self.op.os_type,
9910 minmem=self.be_full[constants.BE_MINMEM],
9911 maxmem=self.be_full[constants.BE_MAXMEM],
9912 vcpus=self.be_full[constants.BE_VCPUS],
9913 nics=_NICListToTuple(self, self.nics),
9914 disk_template=self.op.disk_template,
9915 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9916 for d in self.disks],
9919 hypervisor_name=self.op.hypervisor,
9925 def BuildHooksNodes(self):
9926 """Build hooks nodes.
9929 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9932 def _ReadExportInfo(self):
9933 """Reads the export information from disk.
9935 It will override the opcode source node and path with the actual
9936 information, if these two were not specified before.
9938 @return: the export information
9941 assert self.op.mode == constants.INSTANCE_IMPORT
9943 src_node = self.op.src_node
9944 src_path = self.op.src_path
9946 if src_node is None:
9947 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9948 exp_list = self.rpc.call_export_list(locked_nodes)
9950 for node in exp_list:
9951 if exp_list[node].fail_msg:
9953 if src_path in exp_list[node].payload:
9955 self.op.src_node = src_node = node
9956 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9960 raise errors.OpPrereqError("No export found for relative path %s" %
9961 src_path, errors.ECODE_INVAL)
9963 _CheckNodeOnline(self, src_node)
9964 result = self.rpc.call_export_info(src_node, src_path)
9965 result.Raise("No export or invalid export found in dir %s" % src_path)
9967 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9968 if not export_info.has_section(constants.INISECT_EXP):
9969 raise errors.ProgrammerError("Corrupted export config",
9970 errors.ECODE_ENVIRON)
9972 ei_version = export_info.get(constants.INISECT_EXP, "version")
9973 if (int(ei_version) != constants.EXPORT_VERSION):
9974 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9975 (ei_version, constants.EXPORT_VERSION),
9976 errors.ECODE_ENVIRON)
9979 def _ReadExportParams(self, einfo):
9980 """Use export parameters as defaults.
9982 In case the opcode doesn't specify (as in override) some instance
9983 parameters, then try to use them from the export information, if
9987 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9989 if self.op.disk_template is None:
9990 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9991 self.op.disk_template = einfo.get(constants.INISECT_INS,
9993 if self.op.disk_template not in constants.DISK_TEMPLATES:
9994 raise errors.OpPrereqError("Disk template specified in configuration"
9995 " file is not one of the allowed values:"
9997 " ".join(constants.DISK_TEMPLATES),
10000 raise errors.OpPrereqError("No disk template specified and the export"
10001 " is missing the disk_template information",
10002 errors.ECODE_INVAL)
10004 if not self.op.disks:
10006 # TODO: import the disk iv_name too
10007 for idx in range(constants.MAX_DISKS):
10008 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10009 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10010 disks.append({constants.IDISK_SIZE: disk_sz})
10011 self.op.disks = disks
10012 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10013 raise errors.OpPrereqError("No disk info specified and the export"
10014 " is missing the disk information",
10015 errors.ECODE_INVAL)
10017 if not self.op.nics:
10019 for idx in range(constants.MAX_NICS):
10020 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10022 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10023 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10028 self.op.nics = nics
10030 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10031 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10033 if (self.op.hypervisor is None and
10034 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10035 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10037 if einfo.has_section(constants.INISECT_HYP):
10038 # use the export parameters but do not override the ones
10039 # specified by the user
10040 for name, value in einfo.items(constants.INISECT_HYP):
10041 if name not in self.op.hvparams:
10042 self.op.hvparams[name] = value
10044 if einfo.has_section(constants.INISECT_BEP):
10045 # use the parameters, without overriding
10046 for name, value in einfo.items(constants.INISECT_BEP):
10047 if name not in self.op.beparams:
10048 self.op.beparams[name] = value
10049 # Compatibility for the old "memory" be param
10050 if name == constants.BE_MEMORY:
10051 if constants.BE_MAXMEM not in self.op.beparams:
10052 self.op.beparams[constants.BE_MAXMEM] = value
10053 if constants.BE_MINMEM not in self.op.beparams:
10054 self.op.beparams[constants.BE_MINMEM] = value
10056 # try to read the parameters old style, from the main section
10057 for name in constants.BES_PARAMETERS:
10058 if (name not in self.op.beparams and
10059 einfo.has_option(constants.INISECT_INS, name)):
10060 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10062 if einfo.has_section(constants.INISECT_OSP):
10063 # use the parameters, without overriding
10064 for name, value in einfo.items(constants.INISECT_OSP):
10065 if name not in self.op.osparams:
10066 self.op.osparams[name] = value
10068 def _RevertToDefaults(self, cluster):
10069 """Revert the instance parameters to the default values.
10073 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10074 for name in self.op.hvparams.keys():
10075 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10076 del self.op.hvparams[name]
10078 be_defs = cluster.SimpleFillBE({})
10079 for name in self.op.beparams.keys():
10080 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10081 del self.op.beparams[name]
10083 nic_defs = cluster.SimpleFillNIC({})
10084 for nic in self.op.nics:
10085 for name in constants.NICS_PARAMETERS:
10086 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10089 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10090 for name in self.op.osparams.keys():
10091 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10092 del self.op.osparams[name]
10094 def _CalculateFileStorageDir(self):
10095 """Calculate final instance file storage dir.
10098 # file storage dir calculation/check
10099 self.instance_file_storage_dir = None
10100 if self.op.disk_template in constants.DTS_FILEBASED:
10101 # build the full file storage dir path
10104 if self.op.disk_template == constants.DT_SHARED_FILE:
10105 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10107 get_fsd_fn = self.cfg.GetFileStorageDir
10109 cfg_storagedir = get_fsd_fn()
10110 if not cfg_storagedir:
10111 raise errors.OpPrereqError("Cluster file storage dir not defined",
10112 errors.ECODE_STATE)
10113 joinargs.append(cfg_storagedir)
10115 if self.op.file_storage_dir is not None:
10116 joinargs.append(self.op.file_storage_dir)
10118 joinargs.append(self.op.instance_name)
10120 # pylint: disable=W0142
10121 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10123 def CheckPrereq(self): # pylint: disable=R0914
10124 """Check prerequisites.
10127 self._CalculateFileStorageDir()
10129 if self.op.mode == constants.INSTANCE_IMPORT:
10130 export_info = self._ReadExportInfo()
10131 self._ReadExportParams(export_info)
10132 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10134 self._old_instance_name = None
10136 if (not self.cfg.GetVGName() and
10137 self.op.disk_template not in constants.DTS_NOT_LVM):
10138 raise errors.OpPrereqError("Cluster does not support lvm-based"
10139 " instances", errors.ECODE_STATE)
10141 if (self.op.hypervisor is None or
10142 self.op.hypervisor == constants.VALUE_AUTO):
10143 self.op.hypervisor = self.cfg.GetHypervisorType()
10145 cluster = self.cfg.GetClusterInfo()
10146 enabled_hvs = cluster.enabled_hypervisors
10147 if self.op.hypervisor not in enabled_hvs:
10148 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10150 (self.op.hypervisor, ",".join(enabled_hvs)),
10151 errors.ECODE_STATE)
10153 # Check tag validity
10154 for tag in self.op.tags:
10155 objects.TaggableObject.ValidateTag(tag)
10157 # check hypervisor parameter syntax (locally)
10158 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10159 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10161 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10162 hv_type.CheckParameterSyntax(filled_hvp)
10163 self.hv_full = filled_hvp
10164 # check that we don't specify global parameters on an instance
10165 _CheckGlobalHvParams(self.op.hvparams)
10167 # fill and remember the beparams dict
10168 self.be_full = _ComputeFullBeParams(self.op, cluster)
10170 # build os parameters
10171 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10173 # now that hvp/bep are in final format, let's reset to defaults,
10175 if self.op.identify_defaults:
10176 self._RevertToDefaults(cluster)
10179 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10180 self.proc.GetECId())
10182 # disk checks/pre-build
10183 default_vg = self.cfg.GetVGName()
10184 self.disks = _ComputeDisks(self.op, default_vg)
10186 if self.op.mode == constants.INSTANCE_IMPORT:
10188 for idx in range(len(self.disks)):
10189 option = "disk%d_dump" % idx
10190 if export_info.has_option(constants.INISECT_INS, option):
10191 # FIXME: are the old os-es, disk sizes, etc. useful?
10192 export_name = export_info.get(constants.INISECT_INS, option)
10193 image = utils.PathJoin(self.op.src_path, export_name)
10194 disk_images.append(image)
10196 disk_images.append(False)
10198 self.src_images = disk_images
10200 if self.op.instance_name == self._old_instance_name:
10201 for idx, nic in enumerate(self.nics):
10202 if nic.mac == constants.VALUE_AUTO:
10203 nic_mac_ini = "nic%d_mac" % idx
10204 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10206 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10208 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10209 if self.op.ip_check:
10210 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10211 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10212 (self.check_ip, self.op.instance_name),
10213 errors.ECODE_NOTUNIQUE)
10215 #### mac address generation
10216 # By generating here the mac address both the allocator and the hooks get
10217 # the real final mac address rather than the 'auto' or 'generate' value.
10218 # There is a race condition between the generation and the instance object
10219 # creation, which means that we know the mac is valid now, but we're not
10220 # sure it will be when we actually add the instance. If things go bad
10221 # adding the instance will abort because of a duplicate mac, and the
10222 # creation job will fail.
10223 for nic in self.nics:
10224 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10225 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10229 if self.op.iallocator is not None:
10230 self._RunAllocator()
10232 # Release all unneeded node locks
10233 _ReleaseLocks(self, locking.LEVEL_NODE,
10234 keep=filter(None, [self.op.pnode, self.op.snode,
10235 self.op.src_node]))
10236 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10237 keep=filter(None, [self.op.pnode, self.op.snode,
10238 self.op.src_node]))
10240 #### node related checks
10242 # check primary node
10243 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10244 assert self.pnode is not None, \
10245 "Cannot retrieve locked node %s" % self.op.pnode
10247 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10248 pnode.name, errors.ECODE_STATE)
10250 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10251 pnode.name, errors.ECODE_STATE)
10252 if not pnode.vm_capable:
10253 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10254 " '%s'" % pnode.name, errors.ECODE_STATE)
10256 self.secondaries = []
10258 # Fill in any IPs from IP pools. This must happen here, because we need to
10259 # know the nic's primary node, as specified by the iallocator
10260 for idx, nic in enumerate(self.nics):
10262 if net is not None:
10263 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10264 if netparams is None:
10265 raise errors.OpPrereqError("No netparams found for network"
10266 " %s. Propably not connected to"
10267 " node's %s nodegroup" %
10268 (net, self.pnode.name),
10269 errors.ECODE_INVAL)
10270 self.LogInfo("NIC/%d inherits netparams %s" %
10271 (idx, netparams.values()))
10272 nic.nicparams = dict(netparams)
10273 if nic.ip is not None:
10274 if nic.ip.lower() == constants.NIC_IP_POOL:
10276 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10277 except errors.ReservationError:
10278 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10279 " from the address pool" % idx,
10280 errors.ECODE_STATE)
10281 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10284 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10285 except errors.ReservationError:
10286 raise errors.OpPrereqError("IP address %s already in use"
10287 " or does not belong to network %s" %
10289 errors.ECODE_NOTUNIQUE)
10291 # net is None, ip None or given
10292 if self.op.conflicts_check:
10293 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10295 # mirror node verification
10296 if self.op.disk_template in constants.DTS_INT_MIRROR:
10297 if self.op.snode == pnode.name:
10298 raise errors.OpPrereqError("The secondary node cannot be the"
10299 " primary node", errors.ECODE_INVAL)
10300 _CheckNodeOnline(self, self.op.snode)
10301 _CheckNodeNotDrained(self, self.op.snode)
10302 _CheckNodeVmCapable(self, self.op.snode)
10303 self.secondaries.append(self.op.snode)
10305 snode = self.cfg.GetNodeInfo(self.op.snode)
10306 if pnode.group != snode.group:
10307 self.LogWarning("The primary and secondary nodes are in two"
10308 " different node groups; the disk parameters"
10309 " from the first disk's node group will be"
10312 nodenames = [pnode.name] + self.secondaries
10314 # Verify instance specs
10315 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10317 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319 constants.ISPEC_DISK_COUNT: len(self.disks),
10320 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10321 constants.ISPEC_NIC_COUNT: len(self.nics),
10322 constants.ISPEC_SPINDLE_USE: spindle_use,
10325 group_info = self.cfg.GetNodeGroup(pnode.group)
10326 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10327 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10328 if not self.op.ignore_ipolicy and res:
10329 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10330 (pnode.group, group_info.name, utils.CommaJoin(res)))
10331 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10333 if not self.adopt_disks:
10334 if self.op.disk_template == constants.DT_RBD:
10335 # _CheckRADOSFreeSpace() is just a placeholder.
10336 # Any function that checks prerequisites can be placed here.
10337 # Check if there is enough space on the RADOS cluster.
10338 _CheckRADOSFreeSpace()
10340 # Check lv size requirements, if not adopting
10341 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10342 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10344 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10345 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10346 disk[constants.IDISK_ADOPT])
10347 for disk in self.disks])
10348 if len(all_lvs) != len(self.disks):
10349 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10350 errors.ECODE_INVAL)
10351 for lv_name in all_lvs:
10353 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10354 # to ReserveLV uses the same syntax
10355 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10356 except errors.ReservationError:
10357 raise errors.OpPrereqError("LV named %s used by another instance" %
10358 lv_name, errors.ECODE_NOTUNIQUE)
10360 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10361 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10363 node_lvs = self.rpc.call_lv_list([pnode.name],
10364 vg_names.payload.keys())[pnode.name]
10365 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10366 node_lvs = node_lvs.payload
10368 delta = all_lvs.difference(node_lvs.keys())
10370 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10371 utils.CommaJoin(delta),
10372 errors.ECODE_INVAL)
10373 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10375 raise errors.OpPrereqError("Online logical volumes found, cannot"
10376 " adopt: %s" % utils.CommaJoin(online_lvs),
10377 errors.ECODE_STATE)
10378 # update the size of disk based on what is found
10379 for dsk in self.disks:
10380 dsk[constants.IDISK_SIZE] = \
10381 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10382 dsk[constants.IDISK_ADOPT])][0]))
10384 elif self.op.disk_template == constants.DT_BLOCK:
10385 # Normalize and de-duplicate device paths
10386 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10387 for disk in self.disks])
10388 if len(all_disks) != len(self.disks):
10389 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10390 errors.ECODE_INVAL)
10391 baddisks = [d for d in all_disks
10392 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10394 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10395 " cannot be adopted" %
10396 (utils.CommaJoin(baddisks),
10397 constants.ADOPTABLE_BLOCKDEV_ROOT),
10398 errors.ECODE_INVAL)
10400 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10401 list(all_disks))[pnode.name]
10402 node_disks.Raise("Cannot get block device information from node %s" %
10404 node_disks = node_disks.payload
10405 delta = all_disks.difference(node_disks.keys())
10407 raise errors.OpPrereqError("Missing block device(s): %s" %
10408 utils.CommaJoin(delta),
10409 errors.ECODE_INVAL)
10410 for dsk in self.disks:
10411 dsk[constants.IDISK_SIZE] = \
10412 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10414 # Verify instance specs
10415 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10417 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10418 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10419 constants.ISPEC_DISK_COUNT: len(self.disks),
10420 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10421 for disk in self.disks],
10422 constants.ISPEC_NIC_COUNT: len(self.nics),
10423 constants.ISPEC_SPINDLE_USE: spindle_use,
10426 group_info = self.cfg.GetNodeGroup(pnode.group)
10427 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10428 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10429 if not self.op.ignore_ipolicy and res:
10430 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10431 " policy: %s") % (pnode.group,
10432 utils.CommaJoin(res)),
10433 errors.ECODE_INVAL)
10435 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10437 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10438 # check OS parameters (remotely)
10439 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10441 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10443 # memory check on primary node
10444 #TODO(dynmem): use MINMEM for checking
10446 _CheckNodeFreeMemory(self, self.pnode.name,
10447 "creating instance %s" % self.op.instance_name,
10448 self.be_full[constants.BE_MAXMEM],
10449 self.op.hypervisor)
10451 self.dry_run_result = list(nodenames)
10453 def Exec(self, feedback_fn):
10454 """Create and add the instance to the cluster.
10457 instance = self.op.instance_name
10458 pnode_name = self.pnode.name
10460 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10461 self.owned_locks(locking.LEVEL_NODE)), \
10462 "Node locks differ from node resource locks"
10464 ht_kind = self.op.hypervisor
10465 if ht_kind in constants.HTS_REQ_PORT:
10466 network_port = self.cfg.AllocatePort()
10468 network_port = None
10470 # This is ugly but we got a chicken-egg problem here
10471 # We can only take the group disk parameters, as the instance
10472 # has no disks yet (we are generating them right here).
10473 node = self.cfg.GetNodeInfo(pnode_name)
10474 nodegroup = self.cfg.GetNodeGroup(node.group)
10475 disks = _GenerateDiskTemplate(self,
10476 self.op.disk_template,
10477 instance, pnode_name,
10480 self.instance_file_storage_dir,
10481 self.op.file_driver,
10484 self.cfg.GetGroupDiskParams(nodegroup))
10486 iobj = objects.Instance(name=instance, os=self.op.os_type,
10487 primary_node=pnode_name,
10488 nics=self.nics, disks=disks,
10489 disk_template=self.op.disk_template,
10490 admin_state=constants.ADMINST_DOWN,
10491 network_port=network_port,
10492 beparams=self.op.beparams,
10493 hvparams=self.op.hvparams,
10494 hypervisor=self.op.hypervisor,
10495 osparams=self.op.osparams,
10499 for tag in self.op.tags:
10502 if self.adopt_disks:
10503 if self.op.disk_template == constants.DT_PLAIN:
10504 # rename LVs to the newly-generated names; we need to construct
10505 # 'fake' LV disks with the old data, plus the new unique_id
10506 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10508 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10509 rename_to.append(t_dsk.logical_id)
10510 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10511 self.cfg.SetDiskID(t_dsk, pnode_name)
10512 result = self.rpc.call_blockdev_rename(pnode_name,
10513 zip(tmp_disks, rename_to))
10514 result.Raise("Failed to rename adoped LVs")
10516 feedback_fn("* creating instance disks...")
10518 _CreateDisks(self, iobj)
10519 except errors.OpExecError:
10520 self.LogWarning("Device creation failed, reverting...")
10522 _RemoveDisks(self, iobj)
10524 self.cfg.ReleaseDRBDMinors(instance)
10527 feedback_fn("adding instance %s to cluster config" % instance)
10529 self.cfg.AddInstance(iobj, self.proc.GetECId())
10531 # Declare that we don't want to remove the instance lock anymore, as we've
10532 # added the instance to the config
10533 del self.remove_locks[locking.LEVEL_INSTANCE]
10535 if self.op.mode == constants.INSTANCE_IMPORT:
10536 # Release unused nodes
10537 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10539 # Release all nodes
10540 _ReleaseLocks(self, locking.LEVEL_NODE)
10543 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10544 feedback_fn("* wiping instance disks...")
10546 _WipeDisks(self, iobj)
10547 except errors.OpExecError, err:
10548 logging.exception("Wiping disks failed")
10549 self.LogWarning("Wiping instance disks failed (%s)", err)
10553 # Something is already wrong with the disks, don't do anything else
10555 elif self.op.wait_for_sync:
10556 disk_abort = not _WaitForSync(self, iobj)
10557 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10558 # make sure the disks are not degraded (still sync-ing is ok)
10559 feedback_fn("* checking mirrors status")
10560 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10565 _RemoveDisks(self, iobj)
10566 self.cfg.RemoveInstance(iobj.name)
10567 # Make sure the instance lock gets removed
10568 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10569 raise errors.OpExecError("There are some degraded disks for"
10572 # Release all node resource locks
10573 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10575 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10576 # we need to set the disks ID to the primary node, since the
10577 # preceding code might or might have not done it, depending on
10578 # disk template and other options
10579 for disk in iobj.disks:
10580 self.cfg.SetDiskID(disk, pnode_name)
10581 if self.op.mode == constants.INSTANCE_CREATE:
10582 if not self.op.no_install:
10583 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10584 not self.op.wait_for_sync)
10586 feedback_fn("* pausing disk sync to install instance OS")
10587 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10590 for idx, success in enumerate(result.payload):
10592 logging.warn("pause-sync of instance %s for disk %d failed",
10595 feedback_fn("* running the instance OS create scripts...")
10596 # FIXME: pass debug option from opcode to backend
10598 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10599 self.op.debug_level)
10601 feedback_fn("* resuming disk sync")
10602 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10605 for idx, success in enumerate(result.payload):
10607 logging.warn("resume-sync of instance %s for disk %d failed",
10610 os_add_result.Raise("Could not add os for instance %s"
10611 " on node %s" % (instance, pnode_name))
10614 if self.op.mode == constants.INSTANCE_IMPORT:
10615 feedback_fn("* running the instance OS import scripts...")
10619 for idx, image in enumerate(self.src_images):
10623 # FIXME: pass debug option from opcode to backend
10624 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10625 constants.IEIO_FILE, (image, ),
10626 constants.IEIO_SCRIPT,
10627 (iobj.disks[idx], idx),
10629 transfers.append(dt)
10632 masterd.instance.TransferInstanceData(self, feedback_fn,
10633 self.op.src_node, pnode_name,
10634 self.pnode.secondary_ip,
10636 if not compat.all(import_result):
10637 self.LogWarning("Some disks for instance %s on node %s were not"
10638 " imported successfully" % (instance, pnode_name))
10640 rename_from = self._old_instance_name
10642 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10643 feedback_fn("* preparing remote import...")
10644 # The source cluster will stop the instance before attempting to make
10645 # a connection. In some cases stopping an instance can take a long
10646 # time, hence the shutdown timeout is added to the connection
10648 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10649 self.op.source_shutdown_timeout)
10650 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10652 assert iobj.primary_node == self.pnode.name
10654 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10655 self.source_x509_ca,
10656 self._cds, timeouts)
10657 if not compat.all(disk_results):
10658 # TODO: Should the instance still be started, even if some disks
10659 # failed to import (valid for local imports, too)?
10660 self.LogWarning("Some disks for instance %s on node %s were not"
10661 " imported successfully" % (instance, pnode_name))
10663 rename_from = self.source_instance_name
10666 # also checked in the prereq part
10667 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10670 # Run rename script on newly imported instance
10671 assert iobj.name == instance
10672 feedback_fn("Running rename script for %s" % instance)
10673 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10675 self.op.debug_level)
10676 if result.fail_msg:
10677 self.LogWarning("Failed to run rename script for %s on node"
10678 " %s: %s" % (instance, pnode_name, result.fail_msg))
10680 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10683 iobj.admin_state = constants.ADMINST_UP
10684 self.cfg.Update(iobj, feedback_fn)
10685 logging.info("Starting instance %s on node %s", instance, pnode_name)
10686 feedback_fn("* starting instance...")
10687 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10689 result.Raise("Could not start instance")
10691 return list(iobj.all_nodes)
10694 class LUInstanceMultiAlloc(NoHooksLU):
10695 """Allocates multiple instances at the same time.
10700 def CheckArguments(self):
10701 """Check arguments.
10705 for inst in self.op.instances:
10706 if inst.iallocator is not None:
10707 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10708 " instance objects", errors.ECODE_INVAL)
10709 nodes.append(bool(inst.pnode))
10710 if inst.disk_template in constants.DTS_INT_MIRROR:
10711 nodes.append(bool(inst.snode))
10713 has_nodes = compat.any(nodes)
10714 if compat.all(nodes) ^ has_nodes:
10715 raise errors.OpPrereqError("There are instance objects providing"
10716 " pnode/snode while others do not",
10717 errors.ECODE_INVAL)
10719 if self.op.iallocator is None:
10720 default_iallocator = self.cfg.GetDefaultIAllocator()
10721 if default_iallocator and has_nodes:
10722 self.op.iallocator = default_iallocator
10724 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10725 " given and no cluster-wide default"
10726 " iallocator found; please specify either"
10727 " an iallocator or nodes on the instances"
10728 " or set a cluster-wide default iallocator",
10729 errors.ECODE_INVAL)
10731 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10733 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10734 utils.CommaJoin(dups), errors.ECODE_INVAL)
10736 def ExpandNames(self):
10737 """Calculate the locks.
10740 self.share_locks = _ShareAll()
10741 self.needed_locks = {}
10743 if self.op.iallocator:
10744 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10745 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10748 for inst in self.op.instances:
10749 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10750 nodeslist.append(inst.pnode)
10751 if inst.snode is not None:
10752 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10753 nodeslist.append(inst.snode)
10755 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10756 # Lock resources of instance's primary and secondary nodes (copy to
10757 # prevent accidential modification)
10758 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10760 def CheckPrereq(self):
10761 """Check prerequisite.
10764 cluster = self.cfg.GetClusterInfo()
10765 default_vg = self.cfg.GetVGName()
10766 ec_id = self.proc.GetECId()
10768 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10769 _ComputeNics(op, cluster, None,
10771 _ComputeFullBeParams(op, cluster))
10772 for op in self.op.instances]
10774 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10775 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10777 ial.Run(self.op.iallocator)
10779 if not ial.success:
10780 raise errors.OpPrereqError("Can't compute nodes using"
10781 " iallocator '%s': %s" %
10782 (self.op.iallocator, ial.info),
10783 errors.ECODE_NORES)
10785 self.ia_result = ial.result
10787 if self.op.dry_run:
10788 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10789 constants.JOB_IDS_KEY: [],
10792 def _ConstructPartialResult(self):
10793 """Contructs the partial result.
10796 (allocatable, failed) = self.ia_result
10798 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10799 map(compat.fst, allocatable),
10800 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10803 def Exec(self, feedback_fn):
10804 """Executes the opcode.
10807 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10808 (allocatable, failed) = self.ia_result
10811 for (name, nodes) in allocatable:
10812 op = op2inst.pop(name)
10815 (op.pnode, op.snode) = nodes
10817 (op.pnode,) = nodes
10821 missing = set(op2inst.keys()) - set(failed)
10822 assert not missing, \
10823 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10825 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10828 def _CheckRADOSFreeSpace():
10829 """Compute disk size requirements inside the RADOS cluster.
10832 # For the RADOS cluster we assume there is always enough space.
10836 class LUInstanceConsole(NoHooksLU):
10837 """Connect to an instance's console.
10839 This is somewhat special in that it returns the command line that
10840 you need to run on the master node in order to connect to the
10846 def ExpandNames(self):
10847 self.share_locks = _ShareAll()
10848 self._ExpandAndLockInstance()
10850 def CheckPrereq(self):
10851 """Check prerequisites.
10853 This checks that the instance is in the cluster.
10856 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10857 assert self.instance is not None, \
10858 "Cannot retrieve locked instance %s" % self.op.instance_name
10859 _CheckNodeOnline(self, self.instance.primary_node)
10861 def Exec(self, feedback_fn):
10862 """Connect to the console of an instance
10865 instance = self.instance
10866 node = instance.primary_node
10868 node_insts = self.rpc.call_instance_list([node],
10869 [instance.hypervisor])[node]
10870 node_insts.Raise("Can't get node information from %s" % node)
10872 if instance.name not in node_insts.payload:
10873 if instance.admin_state == constants.ADMINST_UP:
10874 state = constants.INSTST_ERRORDOWN
10875 elif instance.admin_state == constants.ADMINST_DOWN:
10876 state = constants.INSTST_ADMINDOWN
10878 state = constants.INSTST_ADMINOFFLINE
10879 raise errors.OpExecError("Instance %s is not running (state %s)" %
10880 (instance.name, state))
10882 logging.debug("Connecting to console of %s on %s", instance.name, node)
10884 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10887 def _GetInstanceConsole(cluster, instance):
10888 """Returns console information for an instance.
10890 @type cluster: L{objects.Cluster}
10891 @type instance: L{objects.Instance}
10895 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10896 # beparams and hvparams are passed separately, to avoid editing the
10897 # instance and then saving the defaults in the instance itself.
10898 hvparams = cluster.FillHV(instance)
10899 beparams = cluster.FillBE(instance)
10900 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10902 assert console.instance == instance.name
10903 assert console.Validate()
10905 return console.ToDict()
10908 class LUInstanceReplaceDisks(LogicalUnit):
10909 """Replace the disks of an instance.
10912 HPATH = "mirrors-replace"
10913 HTYPE = constants.HTYPE_INSTANCE
10916 def CheckArguments(self):
10917 """Check arguments.
10920 remote_node = self.op.remote_node
10921 ialloc = self.op.iallocator
10922 if self.op.mode == constants.REPLACE_DISK_CHG:
10923 if remote_node is None and ialloc is None:
10924 raise errors.OpPrereqError("When changing the secondary either an"
10925 " iallocator script must be used or the"
10926 " new node given", errors.ECODE_INVAL)
10928 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10930 elif remote_node is not None or ialloc is not None:
10931 # Not replacing the secondary
10932 raise errors.OpPrereqError("The iallocator and new node options can"
10933 " only be used when changing the"
10934 " secondary node", errors.ECODE_INVAL)
10936 def ExpandNames(self):
10937 self._ExpandAndLockInstance()
10939 assert locking.LEVEL_NODE not in self.needed_locks
10940 assert locking.LEVEL_NODE_RES not in self.needed_locks
10941 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10943 assert self.op.iallocator is None or self.op.remote_node is None, \
10944 "Conflicting options"
10946 if self.op.remote_node is not None:
10947 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10949 # Warning: do not remove the locking of the new secondary here
10950 # unless DRBD8.AddChildren is changed to work in parallel;
10951 # currently it doesn't since parallel invocations of
10952 # FindUnusedMinor will conflict
10953 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10956 self.needed_locks[locking.LEVEL_NODE] = []
10957 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10959 if self.op.iallocator is not None:
10960 # iallocator will select a new node in the same group
10961 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10963 self.needed_locks[locking.LEVEL_NODE_RES] = []
10965 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10966 self.op.iallocator, self.op.remote_node,
10967 self.op.disks, self.op.early_release,
10968 self.op.ignore_ipolicy)
10970 self.tasklets = [self.replacer]
10972 def DeclareLocks(self, level):
10973 if level == locking.LEVEL_NODEGROUP:
10974 assert self.op.remote_node is None
10975 assert self.op.iallocator is not None
10976 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10978 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10979 # Lock all groups used by instance optimistically; this requires going
10980 # via the node before it's locked, requiring verification later on
10981 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10982 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10984 elif level == locking.LEVEL_NODE:
10985 if self.op.iallocator is not None:
10986 assert self.op.remote_node is None
10987 assert not self.needed_locks[locking.LEVEL_NODE]
10989 # Lock member nodes of all locked groups
10990 self.needed_locks[locking.LEVEL_NODE] = \
10992 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10993 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10995 self._LockInstancesNodes()
10996 elif level == locking.LEVEL_NODE_RES:
10998 self.needed_locks[locking.LEVEL_NODE_RES] = \
10999 self.needed_locks[locking.LEVEL_NODE]
11001 def BuildHooksEnv(self):
11002 """Build hooks env.
11004 This runs on the master, the primary and all the secondaries.
11007 instance = self.replacer.instance
11009 "MODE": self.op.mode,
11010 "NEW_SECONDARY": self.op.remote_node,
11011 "OLD_SECONDARY": instance.secondary_nodes[0],
11013 env.update(_BuildInstanceHookEnvByObject(self, instance))
11016 def BuildHooksNodes(self):
11017 """Build hooks nodes.
11020 instance = self.replacer.instance
11022 self.cfg.GetMasterNode(),
11023 instance.primary_node,
11025 if self.op.remote_node is not None:
11026 nl.append(self.op.remote_node)
11029 def CheckPrereq(self):
11030 """Check prerequisites.
11033 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11034 self.op.iallocator is None)
11036 # Verify if node group locks are still correct
11037 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11039 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11041 return LogicalUnit.CheckPrereq(self)
11044 class TLReplaceDisks(Tasklet):
11045 """Replaces disks for an instance.
11047 Note: Locking is not within the scope of this class.
11050 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11051 disks, early_release, ignore_ipolicy):
11052 """Initializes this class.
11055 Tasklet.__init__(self, lu)
11058 self.instance_name = instance_name
11060 self.iallocator_name = iallocator_name
11061 self.remote_node = remote_node
11063 self.early_release = early_release
11064 self.ignore_ipolicy = ignore_ipolicy
11067 self.instance = None
11068 self.new_node = None
11069 self.target_node = None
11070 self.other_node = None
11071 self.remote_node_info = None
11072 self.node_secondary_ip = None
11075 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11076 """Compute a new secondary node using an IAllocator.
11079 req = iallocator.IAReqRelocate(name=instance_name,
11080 relocate_from=list(relocate_from))
11081 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11083 ial.Run(iallocator_name)
11085 if not ial.success:
11086 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11087 " %s" % (iallocator_name, ial.info),
11088 errors.ECODE_NORES)
11090 remote_node_name = ial.result[0]
11092 lu.LogInfo("Selected new secondary for instance '%s': %s",
11093 instance_name, remote_node_name)
11095 return remote_node_name
11097 def _FindFaultyDisks(self, node_name):
11098 """Wrapper for L{_FindFaultyInstanceDisks}.
11101 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11104 def _CheckDisksActivated(self, instance):
11105 """Checks if the instance disks are activated.
11107 @param instance: The instance to check disks
11108 @return: True if they are activated, False otherwise
11111 nodes = instance.all_nodes
11113 for idx, dev in enumerate(instance.disks):
11115 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11116 self.cfg.SetDiskID(dev, node)
11118 result = _BlockdevFind(self, node, dev, instance)
11122 elif result.fail_msg or not result.payload:
11127 def CheckPrereq(self):
11128 """Check prerequisites.
11130 This checks that the instance is in the cluster.
11133 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11134 assert instance is not None, \
11135 "Cannot retrieve locked instance %s" % self.instance_name
11137 if instance.disk_template != constants.DT_DRBD8:
11138 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11139 " instances", errors.ECODE_INVAL)
11141 if len(instance.secondary_nodes) != 1:
11142 raise errors.OpPrereqError("The instance has a strange layout,"
11143 " expected one secondary but found %d" %
11144 len(instance.secondary_nodes),
11145 errors.ECODE_FAULT)
11147 instance = self.instance
11148 secondary_node = instance.secondary_nodes[0]
11150 if self.iallocator_name is None:
11151 remote_node = self.remote_node
11153 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11154 instance.name, instance.secondary_nodes)
11156 if remote_node is None:
11157 self.remote_node_info = None
11159 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11160 "Remote node '%s' is not locked" % remote_node
11162 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11163 assert self.remote_node_info is not None, \
11164 "Cannot retrieve locked node %s" % remote_node
11166 if remote_node == self.instance.primary_node:
11167 raise errors.OpPrereqError("The specified node is the primary node of"
11168 " the instance", errors.ECODE_INVAL)
11170 if remote_node == secondary_node:
11171 raise errors.OpPrereqError("The specified node is already the"
11172 " secondary node of the instance",
11173 errors.ECODE_INVAL)
11175 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11176 constants.REPLACE_DISK_CHG):
11177 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11178 errors.ECODE_INVAL)
11180 if self.mode == constants.REPLACE_DISK_AUTO:
11181 if not self._CheckDisksActivated(instance):
11182 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11183 " first" % self.instance_name,
11184 errors.ECODE_STATE)
11185 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11186 faulty_secondary = self._FindFaultyDisks(secondary_node)
11188 if faulty_primary and faulty_secondary:
11189 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11190 " one node and can not be repaired"
11191 " automatically" % self.instance_name,
11192 errors.ECODE_STATE)
11195 self.disks = faulty_primary
11196 self.target_node = instance.primary_node
11197 self.other_node = secondary_node
11198 check_nodes = [self.target_node, self.other_node]
11199 elif faulty_secondary:
11200 self.disks = faulty_secondary
11201 self.target_node = secondary_node
11202 self.other_node = instance.primary_node
11203 check_nodes = [self.target_node, self.other_node]
11209 # Non-automatic modes
11210 if self.mode == constants.REPLACE_DISK_PRI:
11211 self.target_node = instance.primary_node
11212 self.other_node = secondary_node
11213 check_nodes = [self.target_node, self.other_node]
11215 elif self.mode == constants.REPLACE_DISK_SEC:
11216 self.target_node = secondary_node
11217 self.other_node = instance.primary_node
11218 check_nodes = [self.target_node, self.other_node]
11220 elif self.mode == constants.REPLACE_DISK_CHG:
11221 self.new_node = remote_node
11222 self.other_node = instance.primary_node
11223 self.target_node = secondary_node
11224 check_nodes = [self.new_node, self.other_node]
11226 _CheckNodeNotDrained(self.lu, remote_node)
11227 _CheckNodeVmCapable(self.lu, remote_node)
11229 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11230 assert old_node_info is not None
11231 if old_node_info.offline and not self.early_release:
11232 # doesn't make sense to delay the release
11233 self.early_release = True
11234 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11235 " early-release mode", secondary_node)
11238 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11241 # If not specified all disks should be replaced
11243 self.disks = range(len(self.instance.disks))
11245 # TODO: This is ugly, but right now we can't distinguish between internal
11246 # submitted opcode and external one. We should fix that.
11247 if self.remote_node_info:
11248 # We change the node, lets verify it still meets instance policy
11249 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11250 cluster = self.cfg.GetClusterInfo()
11251 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11253 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11254 ignore=self.ignore_ipolicy)
11256 for node in check_nodes:
11257 _CheckNodeOnline(self.lu, node)
11259 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11262 if node_name is not None)
11264 # Release unneeded node and node resource locks
11265 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11266 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11268 # Release any owned node group
11269 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11270 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11272 # Check whether disks are valid
11273 for disk_idx in self.disks:
11274 instance.FindDisk(disk_idx)
11276 # Get secondary node IP addresses
11277 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11278 in self.cfg.GetMultiNodeInfo(touched_nodes))
11280 def Exec(self, feedback_fn):
11281 """Execute disk replacement.
11283 This dispatches the disk replacement to the appropriate handler.
11287 # Verify owned locks before starting operation
11288 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11289 assert set(owned_nodes) == set(self.node_secondary_ip), \
11290 ("Incorrect node locks, owning %s, expected %s" %
11291 (owned_nodes, self.node_secondary_ip.keys()))
11292 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11293 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11295 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11296 assert list(owned_instances) == [self.instance_name], \
11297 "Instance '%s' not locked" % self.instance_name
11299 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11300 "Should not own any node group lock at this point"
11303 feedback_fn("No disks need replacement for instance '%s'" %
11304 self.instance.name)
11307 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11308 (utils.CommaJoin(self.disks), self.instance.name))
11309 feedback_fn("Current primary node: %s", self.instance.primary_node)
11310 feedback_fn("Current seconary node: %s",
11311 utils.CommaJoin(self.instance.secondary_nodes))
11313 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11315 # Activate the instance disks if we're replacing them on a down instance
11317 _StartInstanceDisks(self.lu, self.instance, True)
11320 # Should we replace the secondary node?
11321 if self.new_node is not None:
11322 fn = self._ExecDrbd8Secondary
11324 fn = self._ExecDrbd8DiskOnly
11326 result = fn(feedback_fn)
11328 # Deactivate the instance disks if we're replacing them on a
11331 _SafeShutdownInstanceDisks(self.lu, self.instance)
11333 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11336 # Verify owned locks
11337 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11338 nodes = frozenset(self.node_secondary_ip)
11339 assert ((self.early_release and not owned_nodes) or
11340 (not self.early_release and not (set(owned_nodes) - nodes))), \
11341 ("Not owning the correct locks, early_release=%s, owned=%r,"
11342 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11346 def _CheckVolumeGroup(self, nodes):
11347 self.lu.LogInfo("Checking volume groups")
11349 vgname = self.cfg.GetVGName()
11351 # Make sure volume group exists on all involved nodes
11352 results = self.rpc.call_vg_list(nodes)
11354 raise errors.OpExecError("Can't list volume groups on the nodes")
11357 res = results[node]
11358 res.Raise("Error checking node %s" % node)
11359 if vgname not in res.payload:
11360 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11363 def _CheckDisksExistence(self, nodes):
11364 # Check disk existence
11365 for idx, dev in enumerate(self.instance.disks):
11366 if idx not in self.disks:
11370 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11371 self.cfg.SetDiskID(dev, node)
11373 result = _BlockdevFind(self, node, dev, self.instance)
11375 msg = result.fail_msg
11376 if msg or not result.payload:
11378 msg = "disk not found"
11379 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11382 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11383 for idx, dev in enumerate(self.instance.disks):
11384 if idx not in self.disks:
11387 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11390 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11391 on_primary, ldisk=ldisk):
11392 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11393 " replace disks for instance %s" %
11394 (node_name, self.instance.name))
11396 def _CreateNewStorage(self, node_name):
11397 """Create new storage on the primary or secondary node.
11399 This is only used for same-node replaces, not for changing the
11400 secondary node, hence we don't want to modify the existing disk.
11405 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11406 for idx, dev in enumerate(disks):
11407 if idx not in self.disks:
11410 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11412 self.cfg.SetDiskID(dev, node_name)
11414 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11415 names = _GenerateUniqueNames(self.lu, lv_names)
11417 (data_disk, meta_disk) = dev.children
11418 vg_data = data_disk.logical_id[0]
11419 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11420 logical_id=(vg_data, names[0]),
11421 params=data_disk.params)
11422 vg_meta = meta_disk.logical_id[0]
11423 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11424 size=constants.DRBD_META_SIZE,
11425 logical_id=(vg_meta, names[1]),
11426 params=meta_disk.params)
11428 new_lvs = [lv_data, lv_meta]
11429 old_lvs = [child.Copy() for child in dev.children]
11430 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11432 # we pass force_create=True to force the LVM creation
11433 for new_lv in new_lvs:
11434 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11435 _GetInstanceInfoText(self.instance), False)
11439 def _CheckDevices(self, node_name, iv_names):
11440 for name, (dev, _, _) in iv_names.iteritems():
11441 self.cfg.SetDiskID(dev, node_name)
11443 result = _BlockdevFind(self, node_name, dev, self.instance)
11445 msg = result.fail_msg
11446 if msg or not result.payload:
11448 msg = "disk not found"
11449 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11452 if result.payload.is_degraded:
11453 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11455 def _RemoveOldStorage(self, node_name, iv_names):
11456 for name, (_, old_lvs, _) in iv_names.iteritems():
11457 self.lu.LogInfo("Remove logical volumes for %s", name)
11460 self.cfg.SetDiskID(lv, node_name)
11462 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11464 self.lu.LogWarning("Can't remove old LV: %s", msg,
11465 hint="remove unused LVs manually")
11467 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11468 """Replace a disk on the primary or secondary for DRBD 8.
11470 The algorithm for replace is quite complicated:
11472 1. for each disk to be replaced:
11474 1. create new LVs on the target node with unique names
11475 1. detach old LVs from the drbd device
11476 1. rename old LVs to name_replaced.<time_t>
11477 1. rename new LVs to old LVs
11478 1. attach the new LVs (with the old names now) to the drbd device
11480 1. wait for sync across all devices
11482 1. for each modified disk:
11484 1. remove old LVs (which have the name name_replaces.<time_t>)
11486 Failures are not very well handled.
11491 # Step: check device activation
11492 self.lu.LogStep(1, steps_total, "Check device existence")
11493 self._CheckDisksExistence([self.other_node, self.target_node])
11494 self._CheckVolumeGroup([self.target_node, self.other_node])
11496 # Step: check other node consistency
11497 self.lu.LogStep(2, steps_total, "Check peer consistency")
11498 self._CheckDisksConsistency(self.other_node,
11499 self.other_node == self.instance.primary_node,
11502 # Step: create new storage
11503 self.lu.LogStep(3, steps_total, "Allocate new storage")
11504 iv_names = self._CreateNewStorage(self.target_node)
11506 # Step: for each lv, detach+rename*2+attach
11507 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11508 for dev, old_lvs, new_lvs in iv_names.itervalues():
11509 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11511 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11513 result.Raise("Can't detach drbd from local storage on node"
11514 " %s for device %s" % (self.target_node, dev.iv_name))
11516 #cfg.Update(instance)
11518 # ok, we created the new LVs, so now we know we have the needed
11519 # storage; as such, we proceed on the target node to rename
11520 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11521 # using the assumption that logical_id == physical_id (which in
11522 # turn is the unique_id on that node)
11524 # FIXME(iustin): use a better name for the replaced LVs
11525 temp_suffix = int(time.time())
11526 ren_fn = lambda d, suff: (d.physical_id[0],
11527 d.physical_id[1] + "_replaced-%s" % suff)
11529 # Build the rename list based on what LVs exist on the node
11530 rename_old_to_new = []
11531 for to_ren in old_lvs:
11532 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11533 if not result.fail_msg and result.payload:
11535 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11537 self.lu.LogInfo("Renaming the old LVs on the target node")
11538 result = self.rpc.call_blockdev_rename(self.target_node,
11540 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11542 # Now we rename the new LVs to the old LVs
11543 self.lu.LogInfo("Renaming the new LVs on the target node")
11544 rename_new_to_old = [(new, old.physical_id)
11545 for old, new in zip(old_lvs, new_lvs)]
11546 result = self.rpc.call_blockdev_rename(self.target_node,
11548 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11550 # Intermediate steps of in memory modifications
11551 for old, new in zip(old_lvs, new_lvs):
11552 new.logical_id = old.logical_id
11553 self.cfg.SetDiskID(new, self.target_node)
11555 # We need to modify old_lvs so that removal later removes the
11556 # right LVs, not the newly added ones; note that old_lvs is a
11558 for disk in old_lvs:
11559 disk.logical_id = ren_fn(disk, temp_suffix)
11560 self.cfg.SetDiskID(disk, self.target_node)
11562 # Now that the new lvs have the old name, we can add them to the device
11563 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11564 result = self.rpc.call_blockdev_addchildren(self.target_node,
11565 (dev, self.instance), new_lvs)
11566 msg = result.fail_msg
11568 for new_lv in new_lvs:
11569 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11572 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11573 hint=("cleanup manually the unused logical"
11575 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11577 cstep = itertools.count(5)
11579 if self.early_release:
11580 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11581 self._RemoveOldStorage(self.target_node, iv_names)
11582 # TODO: Check if releasing locks early still makes sense
11583 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11585 # Release all resource locks except those used by the instance
11586 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11587 keep=self.node_secondary_ip.keys())
11589 # Release all node locks while waiting for sync
11590 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11592 # TODO: Can the instance lock be downgraded here? Take the optional disk
11593 # shutdown in the caller into consideration.
11596 # This can fail as the old devices are degraded and _WaitForSync
11597 # does a combined result over all disks, so we don't check its return value
11598 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11599 _WaitForSync(self.lu, self.instance)
11601 # Check all devices manually
11602 self._CheckDevices(self.instance.primary_node, iv_names)
11604 # Step: remove old storage
11605 if not self.early_release:
11606 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11607 self._RemoveOldStorage(self.target_node, iv_names)
11609 def _ExecDrbd8Secondary(self, feedback_fn):
11610 """Replace the secondary node for DRBD 8.
11612 The algorithm for replace is quite complicated:
11613 - for all disks of the instance:
11614 - create new LVs on the new node with same names
11615 - shutdown the drbd device on the old secondary
11616 - disconnect the drbd network on the primary
11617 - create the drbd device on the new secondary
11618 - network attach the drbd on the primary, using an artifice:
11619 the drbd code for Attach() will connect to the network if it
11620 finds a device which is connected to the good local disks but
11621 not network enabled
11622 - wait for sync across all devices
11623 - remove all disks from the old secondary
11625 Failures are not very well handled.
11630 pnode = self.instance.primary_node
11632 # Step: check device activation
11633 self.lu.LogStep(1, steps_total, "Check device existence")
11634 self._CheckDisksExistence([self.instance.primary_node])
11635 self._CheckVolumeGroup([self.instance.primary_node])
11637 # Step: check other node consistency
11638 self.lu.LogStep(2, steps_total, "Check peer consistency")
11639 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11641 # Step: create new storage
11642 self.lu.LogStep(3, steps_total, "Allocate new storage")
11643 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11644 for idx, dev in enumerate(disks):
11645 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11646 (self.new_node, idx))
11647 # we pass force_create=True to force LVM creation
11648 for new_lv in dev.children:
11649 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11650 True, _GetInstanceInfoText(self.instance), False)
11652 # Step 4: dbrd minors and drbd setups changes
11653 # after this, we must manually remove the drbd minors on both the
11654 # error and the success paths
11655 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11656 minors = self.cfg.AllocateDRBDMinor([self.new_node
11657 for dev in self.instance.disks],
11658 self.instance.name)
11659 logging.debug("Allocated minors %r", minors)
11662 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11663 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11664 (self.new_node, idx))
11665 # create new devices on new_node; note that we create two IDs:
11666 # one without port, so the drbd will be activated without
11667 # networking information on the new node at this stage, and one
11668 # with network, for the latter activation in step 4
11669 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11670 if self.instance.primary_node == o_node1:
11673 assert self.instance.primary_node == o_node2, "Three-node instance?"
11676 new_alone_id = (self.instance.primary_node, self.new_node, None,
11677 p_minor, new_minor, o_secret)
11678 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11679 p_minor, new_minor, o_secret)
11681 iv_names[idx] = (dev, dev.children, new_net_id)
11682 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11684 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11685 logical_id=new_alone_id,
11686 children=dev.children,
11689 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11692 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11694 _GetInstanceInfoText(self.instance), False)
11695 except errors.GenericError:
11696 self.cfg.ReleaseDRBDMinors(self.instance.name)
11699 # We have new devices, shutdown the drbd on the old secondary
11700 for idx, dev in enumerate(self.instance.disks):
11701 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11702 self.cfg.SetDiskID(dev, self.target_node)
11703 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11704 (dev, self.instance)).fail_msg
11706 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11707 "node: %s" % (idx, msg),
11708 hint=("Please cleanup this device manually as"
11709 " soon as possible"))
11711 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11712 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11713 self.instance.disks)[pnode]
11715 msg = result.fail_msg
11717 # detaches didn't succeed (unlikely)
11718 self.cfg.ReleaseDRBDMinors(self.instance.name)
11719 raise errors.OpExecError("Can't detach the disks from the network on"
11720 " old node: %s" % (msg,))
11722 # if we managed to detach at least one, we update all the disks of
11723 # the instance to point to the new secondary
11724 self.lu.LogInfo("Updating instance configuration")
11725 for dev, _, new_logical_id in iv_names.itervalues():
11726 dev.logical_id = new_logical_id
11727 self.cfg.SetDiskID(dev, self.instance.primary_node)
11729 self.cfg.Update(self.instance, feedback_fn)
11731 # Release all node locks (the configuration has been updated)
11732 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11734 # and now perform the drbd attach
11735 self.lu.LogInfo("Attaching primary drbds to new secondary"
11736 " (standalone => connected)")
11737 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11739 self.node_secondary_ip,
11740 (self.instance.disks, self.instance),
11741 self.instance.name,
11743 for to_node, to_result in result.items():
11744 msg = to_result.fail_msg
11746 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11748 hint=("please do a gnt-instance info to see the"
11749 " status of disks"))
11751 cstep = itertools.count(5)
11753 if self.early_release:
11754 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11755 self._RemoveOldStorage(self.target_node, iv_names)
11756 # TODO: Check if releasing locks early still makes sense
11757 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11759 # Release all resource locks except those used by the instance
11760 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11761 keep=self.node_secondary_ip.keys())
11763 # TODO: Can the instance lock be downgraded here? Take the optional disk
11764 # shutdown in the caller into consideration.
11767 # This can fail as the old devices are degraded and _WaitForSync
11768 # does a combined result over all disks, so we don't check its return value
11769 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11770 _WaitForSync(self.lu, self.instance)
11772 # Check all devices manually
11773 self._CheckDevices(self.instance.primary_node, iv_names)
11775 # Step: remove old storage
11776 if not self.early_release:
11777 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11778 self._RemoveOldStorage(self.target_node, iv_names)
11781 class LURepairNodeStorage(NoHooksLU):
11782 """Repairs the volume group on a node.
11787 def CheckArguments(self):
11788 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11790 storage_type = self.op.storage_type
11792 if (constants.SO_FIX_CONSISTENCY not in
11793 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11794 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11795 " repaired" % storage_type,
11796 errors.ECODE_INVAL)
11798 def ExpandNames(self):
11799 self.needed_locks = {
11800 locking.LEVEL_NODE: [self.op.node_name],
11803 def _CheckFaultyDisks(self, instance, node_name):
11804 """Ensure faulty disks abort the opcode or at least warn."""
11806 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11808 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11809 " node '%s'" % (instance.name, node_name),
11810 errors.ECODE_STATE)
11811 except errors.OpPrereqError, err:
11812 if self.op.ignore_consistency:
11813 self.LogWarning(str(err.args[0]))
11817 def CheckPrereq(self):
11818 """Check prerequisites.
11821 # Check whether any instance on this node has faulty disks
11822 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11823 if inst.admin_state != constants.ADMINST_UP:
11825 check_nodes = set(inst.all_nodes)
11826 check_nodes.discard(self.op.node_name)
11827 for inst_node_name in check_nodes:
11828 self._CheckFaultyDisks(inst, inst_node_name)
11830 def Exec(self, feedback_fn):
11831 feedback_fn("Repairing storage unit '%s' on %s ..." %
11832 (self.op.name, self.op.node_name))
11834 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11835 result = self.rpc.call_storage_execute(self.op.node_name,
11836 self.op.storage_type, st_args,
11838 constants.SO_FIX_CONSISTENCY)
11839 result.Raise("Failed to repair storage unit '%s' on %s" %
11840 (self.op.name, self.op.node_name))
11843 class LUNodeEvacuate(NoHooksLU):
11844 """Evacuates instances off a list of nodes.
11849 _MODE2IALLOCATOR = {
11850 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11851 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11852 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11854 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11855 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11856 constants.IALLOCATOR_NEVAC_MODES)
11858 def CheckArguments(self):
11859 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11861 def ExpandNames(self):
11862 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11864 if self.op.remote_node is not None:
11865 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11866 assert self.op.remote_node
11868 if self.op.remote_node == self.op.node_name:
11869 raise errors.OpPrereqError("Can not use evacuated node as a new"
11870 " secondary node", errors.ECODE_INVAL)
11872 if self.op.mode != constants.NODE_EVAC_SEC:
11873 raise errors.OpPrereqError("Without the use of an iallocator only"
11874 " secondary instances can be evacuated",
11875 errors.ECODE_INVAL)
11878 self.share_locks = _ShareAll()
11879 self.needed_locks = {
11880 locking.LEVEL_INSTANCE: [],
11881 locking.LEVEL_NODEGROUP: [],
11882 locking.LEVEL_NODE: [],
11885 # Determine nodes (via group) optimistically, needs verification once locks
11886 # have been acquired
11887 self.lock_nodes = self._DetermineNodes()
11889 def _DetermineNodes(self):
11890 """Gets the list of nodes to operate on.
11893 if self.op.remote_node is None:
11894 # Iallocator will choose any node(s) in the same group
11895 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11897 group_nodes = frozenset([self.op.remote_node])
11899 # Determine nodes to be locked
11900 return set([self.op.node_name]) | group_nodes
11902 def _DetermineInstances(self):
11903 """Builds list of instances to operate on.
11906 assert self.op.mode in constants.NODE_EVAC_MODES
11908 if self.op.mode == constants.NODE_EVAC_PRI:
11909 # Primary instances only
11910 inst_fn = _GetNodePrimaryInstances
11911 assert self.op.remote_node is None, \
11912 "Evacuating primary instances requires iallocator"
11913 elif self.op.mode == constants.NODE_EVAC_SEC:
11914 # Secondary instances only
11915 inst_fn = _GetNodeSecondaryInstances
11918 assert self.op.mode == constants.NODE_EVAC_ALL
11919 inst_fn = _GetNodeInstances
11920 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11922 raise errors.OpPrereqError("Due to an issue with the iallocator"
11923 " interface it is not possible to evacuate"
11924 " all instances at once; specify explicitly"
11925 " whether to evacuate primary or secondary"
11927 errors.ECODE_INVAL)
11929 return inst_fn(self.cfg, self.op.node_name)
11931 def DeclareLocks(self, level):
11932 if level == locking.LEVEL_INSTANCE:
11933 # Lock instances optimistically, needs verification once node and group
11934 # locks have been acquired
11935 self.needed_locks[locking.LEVEL_INSTANCE] = \
11936 set(i.name for i in self._DetermineInstances())
11938 elif level == locking.LEVEL_NODEGROUP:
11939 # Lock node groups for all potential target nodes optimistically, needs
11940 # verification once nodes have been acquired
11941 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11942 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11944 elif level == locking.LEVEL_NODE:
11945 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11947 def CheckPrereq(self):
11949 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11950 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11951 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11953 need_nodes = self._DetermineNodes()
11955 if not owned_nodes.issuperset(need_nodes):
11956 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11957 " locks were acquired, current nodes are"
11958 " are '%s', used to be '%s'; retry the"
11960 (self.op.node_name,
11961 utils.CommaJoin(need_nodes),
11962 utils.CommaJoin(owned_nodes)),
11963 errors.ECODE_STATE)
11965 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11966 if owned_groups != wanted_groups:
11967 raise errors.OpExecError("Node groups changed since locks were acquired,"
11968 " current groups are '%s', used to be '%s';"
11969 " retry the operation" %
11970 (utils.CommaJoin(wanted_groups),
11971 utils.CommaJoin(owned_groups)))
11973 # Determine affected instances
11974 self.instances = self._DetermineInstances()
11975 self.instance_names = [i.name for i in self.instances]
11977 if set(self.instance_names) != owned_instances:
11978 raise errors.OpExecError("Instances on node '%s' changed since locks"
11979 " were acquired, current instances are '%s',"
11980 " used to be '%s'; retry the operation" %
11981 (self.op.node_name,
11982 utils.CommaJoin(self.instance_names),
11983 utils.CommaJoin(owned_instances)))
11985 if self.instance_names:
11986 self.LogInfo("Evacuating instances from node '%s': %s",
11988 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11990 self.LogInfo("No instances to evacuate from node '%s'",
11993 if self.op.remote_node is not None:
11994 for i in self.instances:
11995 if i.primary_node == self.op.remote_node:
11996 raise errors.OpPrereqError("Node %s is the primary node of"
11997 " instance %s, cannot use it as"
11999 (self.op.remote_node, i.name),
12000 errors.ECODE_INVAL)
12002 def Exec(self, feedback_fn):
12003 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12005 if not self.instance_names:
12006 # No instances to evacuate
12009 elif self.op.iallocator is not None:
12010 # TODO: Implement relocation to other group
12011 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12012 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12013 instances=list(self.instance_names))
12014 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12016 ial.Run(self.op.iallocator)
12018 if not ial.success:
12019 raise errors.OpPrereqError("Can't compute node evacuation using"
12020 " iallocator '%s': %s" %
12021 (self.op.iallocator, ial.info),
12022 errors.ECODE_NORES)
12024 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12026 elif self.op.remote_node is not None:
12027 assert self.op.mode == constants.NODE_EVAC_SEC
12029 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12030 remote_node=self.op.remote_node,
12032 mode=constants.REPLACE_DISK_CHG,
12033 early_release=self.op.early_release)]
12034 for instance_name in self.instance_names]
12037 raise errors.ProgrammerError("No iallocator or remote node")
12039 return ResultWithJobs(jobs)
12042 def _SetOpEarlyRelease(early_release, op):
12043 """Sets C{early_release} flag on opcodes if available.
12047 op.early_release = early_release
12048 except AttributeError:
12049 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12054 def _NodeEvacDest(use_nodes, group, nodes):
12055 """Returns group or nodes depending on caller's choice.
12059 return utils.CommaJoin(nodes)
12064 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12065 """Unpacks the result of change-group and node-evacuate iallocator requests.
12067 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12068 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12070 @type lu: L{LogicalUnit}
12071 @param lu: Logical unit instance
12072 @type alloc_result: tuple/list
12073 @param alloc_result: Result from iallocator
12074 @type early_release: bool
12075 @param early_release: Whether to release locks early if possible
12076 @type use_nodes: bool
12077 @param use_nodes: Whether to display node names instead of groups
12080 (moved, failed, jobs) = alloc_result
12083 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12084 for (name, reason) in failed)
12085 lu.LogWarning("Unable to evacuate instances %s", failreason)
12086 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12089 lu.LogInfo("Instances to be moved: %s",
12090 utils.CommaJoin("%s (to %s)" %
12091 (name, _NodeEvacDest(use_nodes, group, nodes))
12092 for (name, group, nodes) in moved))
12094 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12095 map(opcodes.OpCode.LoadOpCode, ops))
12099 def _DiskSizeInBytesToMebibytes(lu, size):
12100 """Converts a disk size in bytes to mebibytes.
12102 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12105 (mib, remainder) = divmod(size, 1024 * 1024)
12108 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12109 " to not overwrite existing data (%s bytes will not be"
12110 " wiped)", (1024 * 1024) - remainder)
12116 class LUInstanceGrowDisk(LogicalUnit):
12117 """Grow a disk of an instance.
12120 HPATH = "disk-grow"
12121 HTYPE = constants.HTYPE_INSTANCE
12124 def ExpandNames(self):
12125 self._ExpandAndLockInstance()
12126 self.needed_locks[locking.LEVEL_NODE] = []
12127 self.needed_locks[locking.LEVEL_NODE_RES] = []
12128 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12129 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12131 def DeclareLocks(self, level):
12132 if level == locking.LEVEL_NODE:
12133 self._LockInstancesNodes()
12134 elif level == locking.LEVEL_NODE_RES:
12136 self.needed_locks[locking.LEVEL_NODE_RES] = \
12137 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12139 def BuildHooksEnv(self):
12140 """Build hooks env.
12142 This runs on the master, the primary and all the secondaries.
12146 "DISK": self.op.disk,
12147 "AMOUNT": self.op.amount,
12148 "ABSOLUTE": self.op.absolute,
12150 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12153 def BuildHooksNodes(self):
12154 """Build hooks nodes.
12157 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12160 def CheckPrereq(self):
12161 """Check prerequisites.
12163 This checks that the instance is in the cluster.
12166 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12167 assert instance is not None, \
12168 "Cannot retrieve locked instance %s" % self.op.instance_name
12169 nodenames = list(instance.all_nodes)
12170 for node in nodenames:
12171 _CheckNodeOnline(self, node)
12173 self.instance = instance
12175 if instance.disk_template not in constants.DTS_GROWABLE:
12176 raise errors.OpPrereqError("Instance's disk layout does not support"
12177 " growing", errors.ECODE_INVAL)
12179 self.disk = instance.FindDisk(self.op.disk)
12181 if self.op.absolute:
12182 self.target = self.op.amount
12183 self.delta = self.target - self.disk.size
12185 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12186 "current disk size (%s)" %
12187 (utils.FormatUnit(self.target, "h"),
12188 utils.FormatUnit(self.disk.size, "h")),
12189 errors.ECODE_STATE)
12191 self.delta = self.op.amount
12192 self.target = self.disk.size + self.delta
12194 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12195 utils.FormatUnit(self.delta, "h"),
12196 errors.ECODE_INVAL)
12198 if instance.disk_template not in (constants.DT_FILE,
12199 constants.DT_SHARED_FILE,
12201 # TODO: check the free disk space for file, when that feature will be
12203 _CheckNodesFreeDiskPerVG(self, nodenames,
12204 self.disk.ComputeGrowth(self.delta))
12206 def Exec(self, feedback_fn):
12207 """Execute disk grow.
12210 instance = self.instance
12213 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12214 assert (self.owned_locks(locking.LEVEL_NODE) ==
12215 self.owned_locks(locking.LEVEL_NODE_RES))
12217 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12219 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12221 raise errors.OpExecError("Cannot activate block device to grow")
12223 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12224 (self.op.disk, instance.name,
12225 utils.FormatUnit(self.delta, "h"),
12226 utils.FormatUnit(self.target, "h")))
12228 # First run all grow ops in dry-run mode
12229 for node in instance.all_nodes:
12230 self.cfg.SetDiskID(disk, node)
12231 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12233 result.Raise("Dry-run grow request failed to node %s" % node)
12236 # Get disk size from primary node for wiping
12237 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12238 result.Raise("Failed to retrieve disk size from node '%s'" %
12239 instance.primary_node)
12241 (disk_size_in_bytes, ) = result.payload
12243 if disk_size_in_bytes is None:
12244 raise errors.OpExecError("Failed to retrieve disk size from primary"
12245 " node '%s'" % instance.primary_node)
12247 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12249 assert old_disk_size >= disk.size, \
12250 ("Retrieved disk size too small (got %s, should be at least %s)" %
12251 (old_disk_size, disk.size))
12253 old_disk_size = None
12255 # We know that (as far as we can test) operations across different
12256 # nodes will succeed, time to run it for real on the backing storage
12257 for node in instance.all_nodes:
12258 self.cfg.SetDiskID(disk, node)
12259 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12261 result.Raise("Grow request failed to node %s" % node)
12263 # And now execute it for logical storage, on the primary node
12264 node = instance.primary_node
12265 self.cfg.SetDiskID(disk, node)
12266 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12268 result.Raise("Grow request failed to node %s" % node)
12270 disk.RecordGrow(self.delta)
12271 self.cfg.Update(instance, feedback_fn)
12273 # Changes have been recorded, release node lock
12274 _ReleaseLocks(self, locking.LEVEL_NODE)
12276 # Downgrade lock while waiting for sync
12277 self.glm.downgrade(locking.LEVEL_INSTANCE)
12279 assert wipe_disks ^ (old_disk_size is None)
12282 assert instance.disks[self.op.disk] == disk
12284 # Wipe newly added disk space
12285 _WipeDisks(self, instance,
12286 disks=[(self.op.disk, disk, old_disk_size)])
12288 if self.op.wait_for_sync:
12289 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12291 self.LogWarning("Disk syncing has not returned a good status; check"
12293 if instance.admin_state != constants.ADMINST_UP:
12294 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12295 elif instance.admin_state != constants.ADMINST_UP:
12296 self.LogWarning("Not shutting down the disk even if the instance is"
12297 " not supposed to be running because no wait for"
12298 " sync mode was requested")
12300 assert self.owned_locks(locking.LEVEL_NODE_RES)
12301 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12304 class LUInstanceQueryData(NoHooksLU):
12305 """Query runtime instance data.
12310 def ExpandNames(self):
12311 self.needed_locks = {}
12313 # Use locking if requested or when non-static information is wanted
12314 if not (self.op.static or self.op.use_locking):
12315 self.LogWarning("Non-static data requested, locks need to be acquired")
12316 self.op.use_locking = True
12318 if self.op.instances or not self.op.use_locking:
12319 # Expand instance names right here
12320 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12322 # Will use acquired locks
12323 self.wanted_names = None
12325 if self.op.use_locking:
12326 self.share_locks = _ShareAll()
12328 if self.wanted_names is None:
12329 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12331 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12333 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12334 self.needed_locks[locking.LEVEL_NODE] = []
12335 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12337 def DeclareLocks(self, level):
12338 if self.op.use_locking:
12339 if level == locking.LEVEL_NODEGROUP:
12340 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12342 # Lock all groups used by instances optimistically; this requires going
12343 # via the node before it's locked, requiring verification later on
12344 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12345 frozenset(group_uuid
12346 for instance_name in owned_instances
12348 self.cfg.GetInstanceNodeGroups(instance_name))
12350 elif level == locking.LEVEL_NODE:
12351 self._LockInstancesNodes()
12353 def CheckPrereq(self):
12354 """Check prerequisites.
12356 This only checks the optional instance list against the existing names.
12359 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12360 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12361 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12363 if self.wanted_names is None:
12364 assert self.op.use_locking, "Locking was not used"
12365 self.wanted_names = owned_instances
12367 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12369 if self.op.use_locking:
12370 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12373 assert not (owned_instances or owned_groups or owned_nodes)
12375 self.wanted_instances = instances.values()
12377 def _ComputeBlockdevStatus(self, node, instance, dev):
12378 """Returns the status of a block device
12381 if self.op.static or not node:
12384 self.cfg.SetDiskID(dev, node)
12386 result = self.rpc.call_blockdev_find(node, dev)
12390 result.Raise("Can't compute disk status for %s" % instance.name)
12392 status = result.payload
12396 return (status.dev_path, status.major, status.minor,
12397 status.sync_percent, status.estimated_time,
12398 status.is_degraded, status.ldisk_status)
12400 def _ComputeDiskStatus(self, instance, snode, dev):
12401 """Compute block device status.
12404 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12406 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12408 def _ComputeDiskStatusInner(self, instance, snode, dev):
12409 """Compute block device status.
12411 @attention: The device has to be annotated already.
12414 if dev.dev_type in constants.LDS_DRBD:
12415 # we change the snode then (otherwise we use the one passed in)
12416 if dev.logical_id[0] == instance.primary_node:
12417 snode = dev.logical_id[1]
12419 snode = dev.logical_id[0]
12421 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12423 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12426 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12433 "iv_name": dev.iv_name,
12434 "dev_type": dev.dev_type,
12435 "logical_id": dev.logical_id,
12436 "physical_id": dev.physical_id,
12437 "pstatus": dev_pstatus,
12438 "sstatus": dev_sstatus,
12439 "children": dev_children,
12444 def Exec(self, feedback_fn):
12445 """Gather and return data"""
12448 cluster = self.cfg.GetClusterInfo()
12450 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12451 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12453 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12454 for node in nodes.values()))
12456 group2name_fn = lambda uuid: groups[uuid].name
12458 for instance in self.wanted_instances:
12459 pnode = nodes[instance.primary_node]
12461 if self.op.static or pnode.offline:
12462 remote_state = None
12464 self.LogWarning("Primary node %s is marked offline, returning static"
12465 " information only for instance %s" %
12466 (pnode.name, instance.name))
12468 remote_info = self.rpc.call_instance_info(instance.primary_node,
12470 instance.hypervisor)
12471 remote_info.Raise("Error checking node %s" % instance.primary_node)
12472 remote_info = remote_info.payload
12473 if remote_info and "state" in remote_info:
12474 remote_state = "up"
12476 if instance.admin_state == constants.ADMINST_UP:
12477 remote_state = "down"
12479 remote_state = instance.admin_state
12481 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12484 snodes_group_uuids = [nodes[snode_name].group
12485 for snode_name in instance.secondary_nodes]
12487 result[instance.name] = {
12488 "name": instance.name,
12489 "config_state": instance.admin_state,
12490 "run_state": remote_state,
12491 "pnode": instance.primary_node,
12492 "pnode_group_uuid": pnode.group,
12493 "pnode_group_name": group2name_fn(pnode.group),
12494 "snodes": instance.secondary_nodes,
12495 "snodes_group_uuids": snodes_group_uuids,
12496 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12498 # this happens to be the same format used for hooks
12499 "nics": _NICListToTuple(self, instance.nics),
12500 "disk_template": instance.disk_template,
12502 "hypervisor": instance.hypervisor,
12503 "network_port": instance.network_port,
12504 "hv_instance": instance.hvparams,
12505 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12506 "be_instance": instance.beparams,
12507 "be_actual": cluster.FillBE(instance),
12508 "os_instance": instance.osparams,
12509 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12510 "serial_no": instance.serial_no,
12511 "mtime": instance.mtime,
12512 "ctime": instance.ctime,
12513 "uuid": instance.uuid,
12519 def PrepareContainerMods(mods, private_fn):
12520 """Prepares a list of container modifications by adding a private data field.
12522 @type mods: list of tuples; (operation, index, parameters)
12523 @param mods: List of modifications
12524 @type private_fn: callable or None
12525 @param private_fn: Callable for constructing a private data field for a
12530 if private_fn is None:
12535 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12538 #: Type description for changes as returned by L{ApplyContainerMods}'s
12540 _TApplyContModsCbChanges = \
12541 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12542 ht.TNonEmptyString,
12547 def ApplyContainerMods(kind, container, chgdesc, mods,
12548 create_fn, modify_fn, remove_fn):
12549 """Applies descriptions in C{mods} to C{container}.
12552 @param kind: One-word item description
12553 @type container: list
12554 @param container: Container to modify
12555 @type chgdesc: None or list
12556 @param chgdesc: List of applied changes
12558 @param mods: Modifications as returned by L{PrepareContainerMods}
12559 @type create_fn: callable
12560 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12561 receives absolute item index, parameters and private data object as added
12562 by L{PrepareContainerMods}, returns tuple containing new item and changes
12564 @type modify_fn: callable
12565 @param modify_fn: Callback for modifying an existing item
12566 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12567 and private data object as added by L{PrepareContainerMods}, returns
12569 @type remove_fn: callable
12570 @param remove_fn: Callback on removing item; receives absolute item index,
12571 item and private data object as added by L{PrepareContainerMods}
12574 for (op, idx, params, private) in mods:
12577 absidx = len(container) - 1
12579 raise IndexError("Not accepting negative indices other than -1")
12580 elif idx > len(container):
12581 raise IndexError("Got %s index %s, but there are only %s" %
12582 (kind, idx, len(container)))
12588 if op == constants.DDM_ADD:
12589 # Calculate where item will be added
12591 addidx = len(container)
12595 if create_fn is None:
12598 (item, changes) = create_fn(addidx, params, private)
12601 container.append(item)
12604 assert idx <= len(container)
12605 # list.insert does so before the specified index
12606 container.insert(idx, item)
12608 # Retrieve existing item
12610 item = container[absidx]
12612 raise IndexError("Invalid %s index %s" % (kind, idx))
12614 if op == constants.DDM_REMOVE:
12617 if remove_fn is not None:
12618 remove_fn(absidx, item, private)
12620 changes = [("%s/%s" % (kind, absidx), "remove")]
12622 assert container[absidx] == item
12623 del container[absidx]
12624 elif op == constants.DDM_MODIFY:
12625 if modify_fn is not None:
12626 changes = modify_fn(absidx, item, params, private)
12628 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12630 assert _TApplyContModsCbChanges(changes)
12632 if not (chgdesc is None or changes is None):
12633 chgdesc.extend(changes)
12636 def _UpdateIvNames(base_index, disks):
12637 """Updates the C{iv_name} attribute of disks.
12639 @type disks: list of L{objects.Disk}
12642 for (idx, disk) in enumerate(disks):
12643 disk.iv_name = "disk/%s" % (base_index + idx, )
12646 class _InstNicModPrivate:
12647 """Data structure for network interface modifications.
12649 Used by L{LUInstanceSetParams}.
12652 def __init__(self):
12657 class LUInstanceSetParams(LogicalUnit):
12658 """Modifies an instances's parameters.
12661 HPATH = "instance-modify"
12662 HTYPE = constants.HTYPE_INSTANCE
12666 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12667 assert ht.TList(mods)
12668 assert not mods or len(mods[0]) in (2, 3)
12670 if mods and len(mods[0]) == 2:
12674 for op, params in mods:
12675 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12676 result.append((op, -1, params))
12680 raise errors.OpPrereqError("Only one %s add or remove operation is"
12681 " supported at a time" % kind,
12682 errors.ECODE_INVAL)
12684 result.append((constants.DDM_MODIFY, op, params))
12686 assert verify_fn(result)
12693 def _CheckMods(kind, mods, key_types, item_fn):
12694 """Ensures requested disk/NIC modifications are valid.
12697 for (op, _, params) in mods:
12698 assert ht.TDict(params)
12700 utils.ForceDictType(params, key_types)
12702 if op == constants.DDM_REMOVE:
12704 raise errors.OpPrereqError("No settings should be passed when"
12705 " removing a %s" % kind,
12706 errors.ECODE_INVAL)
12707 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12708 item_fn(op, params)
12710 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12713 def _VerifyDiskModification(op, params):
12714 """Verifies a disk modification.
12717 if op == constants.DDM_ADD:
12718 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12719 if mode not in constants.DISK_ACCESS_SET:
12720 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12721 errors.ECODE_INVAL)
12723 size = params.get(constants.IDISK_SIZE, None)
12725 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12726 constants.IDISK_SIZE, errors.ECODE_INVAL)
12730 except (TypeError, ValueError), err:
12731 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12732 errors.ECODE_INVAL)
12734 params[constants.IDISK_SIZE] = size
12736 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12737 raise errors.OpPrereqError("Disk size change not possible, use"
12738 " grow-disk", errors.ECODE_INVAL)
12741 def _VerifyNicModification(op, params):
12742 """Verifies a network interface modification.
12745 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12746 ip = params.get(constants.INIC_IP, None)
12747 req_net = params.get(constants.INIC_NETWORK, None)
12748 link = params.get(constants.NIC_LINK, None)
12749 mode = params.get(constants.NIC_MODE, None)
12750 if req_net is not None:
12751 if req_net.lower() == constants.VALUE_NONE:
12752 params[constants.INIC_NETWORK] = None
12754 elif link is not None or mode is not None:
12755 raise errors.OpPrereqError("If network is given"
12756 " mode or link should not",
12757 errors.ECODE_INVAL)
12759 if op == constants.DDM_ADD:
12760 macaddr = params.get(constants.INIC_MAC, None)
12761 if macaddr is None:
12762 params[constants.INIC_MAC] = constants.VALUE_AUTO
12765 if ip.lower() == constants.VALUE_NONE:
12766 params[constants.INIC_IP] = None
12768 if ip.lower() == constants.NIC_IP_POOL:
12769 if op == constants.DDM_ADD and req_net is None:
12770 raise errors.OpPrereqError("If ip=pool, parameter network"
12772 errors.ECODE_INVAL)
12774 if not netutils.IPAddress.IsValid(ip):
12775 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12776 errors.ECODE_INVAL)
12778 if constants.INIC_MAC in params:
12779 macaddr = params[constants.INIC_MAC]
12780 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12781 macaddr = utils.NormalizeAndValidateMac(macaddr)
12783 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12784 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12785 " modifying an existing NIC",
12786 errors.ECODE_INVAL)
12788 def CheckArguments(self):
12789 if not (self.op.nics or self.op.disks or self.op.disk_template or
12790 self.op.hvparams or self.op.beparams or self.op.os_name or
12791 self.op.offline is not None or self.op.runtime_mem):
12792 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12794 if self.op.hvparams:
12795 _CheckGlobalHvParams(self.op.hvparams)
12797 self.op.disks = self._UpgradeDiskNicMods(
12798 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12799 self.op.nics = self._UpgradeDiskNicMods(
12800 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12802 # Check disk modifications
12803 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12804 self._VerifyDiskModification)
12806 if self.op.disks and self.op.disk_template is not None:
12807 raise errors.OpPrereqError("Disk template conversion and other disk"
12808 " changes not supported at the same time",
12809 errors.ECODE_INVAL)
12811 if (self.op.disk_template and
12812 self.op.disk_template in constants.DTS_INT_MIRROR and
12813 self.op.remote_node is None):
12814 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12815 " one requires specifying a secondary node",
12816 errors.ECODE_INVAL)
12818 # Check NIC modifications
12819 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12820 self._VerifyNicModification)
12822 def ExpandNames(self):
12823 self._ExpandAndLockInstance()
12824 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12825 # Can't even acquire node locks in shared mode as upcoming changes in
12826 # Ganeti 2.6 will start to modify the node object on disk conversion
12827 self.needed_locks[locking.LEVEL_NODE] = []
12828 self.needed_locks[locking.LEVEL_NODE_RES] = []
12829 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12830 # Look node group to look up the ipolicy
12831 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12833 def DeclareLocks(self, level):
12834 if level == locking.LEVEL_NODEGROUP:
12835 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12836 # Acquire locks for the instance's nodegroups optimistically. Needs
12837 # to be verified in CheckPrereq
12838 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12839 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12840 elif level == locking.LEVEL_NODE:
12841 self._LockInstancesNodes()
12842 if self.op.disk_template and self.op.remote_node:
12843 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12844 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12845 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12847 self.needed_locks[locking.LEVEL_NODE_RES] = \
12848 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12850 def BuildHooksEnv(self):
12851 """Build hooks env.
12853 This runs on the master, primary and secondaries.
12857 if constants.BE_MINMEM in self.be_new:
12858 args["minmem"] = self.be_new[constants.BE_MINMEM]
12859 if constants.BE_MAXMEM in self.be_new:
12860 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12861 if constants.BE_VCPUS in self.be_new:
12862 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12863 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12864 # information at all.
12866 if self._new_nics is not None:
12869 for nic in self._new_nics:
12870 n = copy.deepcopy(nic)
12871 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12872 n.nicparams = nicparams
12873 nics.append(_NICToTuple(self, n))
12875 args["nics"] = nics
12877 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12878 if self.op.disk_template:
12879 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12880 if self.op.runtime_mem:
12881 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12885 def BuildHooksNodes(self):
12886 """Build hooks nodes.
12889 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12892 def _PrepareNicModification(self, params, private, old_ip, old_net,
12893 old_params, cluster, pnode):
12895 update_params_dict = dict([(key, params[key])
12896 for key in constants.NICS_PARAMETERS
12899 req_link = update_params_dict.get(constants.NIC_LINK, None)
12900 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12902 new_net = params.get(constants.INIC_NETWORK, old_net)
12903 if new_net is not None:
12904 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12905 if netparams is None:
12906 raise errors.OpPrereqError("No netparams found for the network"
12907 " %s, probably not connected" % new_net,
12908 errors.ECODE_INVAL)
12909 new_params = dict(netparams)
12911 new_params = _GetUpdatedParams(old_params, update_params_dict)
12913 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12915 new_filled_params = cluster.SimpleFillNIC(new_params)
12916 objects.NIC.CheckParameterSyntax(new_filled_params)
12918 new_mode = new_filled_params[constants.NIC_MODE]
12919 if new_mode == constants.NIC_MODE_BRIDGED:
12920 bridge = new_filled_params[constants.NIC_LINK]
12921 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12923 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12925 self.warn.append(msg)
12927 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12929 elif new_mode == constants.NIC_MODE_ROUTED:
12930 ip = params.get(constants.INIC_IP, old_ip)
12932 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12933 " on a routed NIC", errors.ECODE_INVAL)
12935 if constants.INIC_MAC in params:
12936 mac = params[constants.INIC_MAC]
12938 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12939 errors.ECODE_INVAL)
12940 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12941 # otherwise generate the MAC address
12942 params[constants.INIC_MAC] = \
12943 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12945 # or validate/reserve the current one
12947 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12948 except errors.ReservationError:
12949 raise errors.OpPrereqError("MAC address '%s' already in use"
12950 " in cluster" % mac,
12951 errors.ECODE_NOTUNIQUE)
12952 elif new_net != old_net:
12954 def get_net_prefix(net):
12956 uuid = self.cfg.LookupNetwork(net)
12958 nobj = self.cfg.GetNetwork(uuid)
12959 return nobj.mac_prefix
12962 new_prefix = get_net_prefix(new_net)
12963 old_prefix = get_net_prefix(old_net)
12964 if old_prefix != new_prefix:
12965 params[constants.INIC_MAC] = \
12966 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12968 #if there is a change in nic-network configuration
12969 new_ip = params.get(constants.INIC_IP, old_ip)
12970 if (new_ip, new_net) != (old_ip, old_net):
12973 if new_ip.lower() == constants.NIC_IP_POOL:
12975 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12976 except errors.ReservationError:
12977 raise errors.OpPrereqError("Unable to get a free IP"
12978 " from the address pool",
12979 errors.ECODE_STATE)
12980 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12981 params[constants.INIC_IP] = new_ip
12982 elif new_ip != old_ip or new_net != old_net:
12984 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12985 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12986 except errors.ReservationError:
12987 raise errors.OpPrereqError("IP %s not available in network %s" %
12989 errors.ECODE_NOTUNIQUE)
12990 elif new_ip.lower() == constants.NIC_IP_POOL:
12991 raise errors.OpPrereqError("ip=pool, but no network found",
12992 errors.ECODE_INVAL)
12995 if self.op.conflicts_check:
12996 _CheckForConflictingIp(self, new_ip, pnode)
13001 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13002 except errors.AddressPoolError:
13003 logging.warning("Release IP %s not contained in network %s",
13006 # there are no changes in (net, ip) tuple
13007 elif (old_net is not None and
13008 (req_link is not None or req_mode is not None)):
13009 raise errors.OpPrereqError("Not allowed to change link or mode of"
13010 " a NIC that is connected to a network",
13011 errors.ECODE_INVAL)
13013 private.params = new_params
13014 private.filled = new_filled_params
13016 def CheckPrereq(self):
13017 """Check prerequisites.
13019 This only checks the instance list against the existing names.
13022 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13023 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13025 cluster = self.cluster = self.cfg.GetClusterInfo()
13026 assert self.instance is not None, \
13027 "Cannot retrieve locked instance %s" % self.op.instance_name
13029 pnode = instance.primary_node
13030 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13031 nodelist = list(instance.all_nodes)
13032 pnode_info = self.cfg.GetNodeInfo(pnode)
13033 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13035 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13036 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13037 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13039 # dictionary with instance information after the modification
13042 # Prepare disk/NIC modifications
13043 self.diskmod = PrepareContainerMods(self.op.disks, None)
13044 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13047 if self.op.os_name and not self.op.force:
13048 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13049 self.op.force_variant)
13050 instance_os = self.op.os_name
13052 instance_os = instance.os
13054 assert not (self.op.disk_template and self.op.disks), \
13055 "Can't modify disk template and apply disk changes at the same time"
13057 if self.op.disk_template:
13058 if instance.disk_template == self.op.disk_template:
13059 raise errors.OpPrereqError("Instance already has disk template %s" %
13060 instance.disk_template, errors.ECODE_INVAL)
13062 if (instance.disk_template,
13063 self.op.disk_template) not in self._DISK_CONVERSIONS:
13064 raise errors.OpPrereqError("Unsupported disk template conversion from"
13065 " %s to %s" % (instance.disk_template,
13066 self.op.disk_template),
13067 errors.ECODE_INVAL)
13068 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13069 msg="cannot change disk template")
13070 if self.op.disk_template in constants.DTS_INT_MIRROR:
13071 if self.op.remote_node == pnode:
13072 raise errors.OpPrereqError("Given new secondary node %s is the same"
13073 " as the primary node of the instance" %
13074 self.op.remote_node, errors.ECODE_STATE)
13075 _CheckNodeOnline(self, self.op.remote_node)
13076 _CheckNodeNotDrained(self, self.op.remote_node)
13077 # FIXME: here we assume that the old instance type is DT_PLAIN
13078 assert instance.disk_template == constants.DT_PLAIN
13079 disks = [{constants.IDISK_SIZE: d.size,
13080 constants.IDISK_VG: d.logical_id[0]}
13081 for d in instance.disks]
13082 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13083 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13085 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13086 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13087 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13089 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13090 ignore=self.op.ignore_ipolicy)
13091 if pnode_info.group != snode_info.group:
13092 self.LogWarning("The primary and secondary nodes are in two"
13093 " different node groups; the disk parameters"
13094 " from the first disk's node group will be"
13097 # hvparams processing
13098 if self.op.hvparams:
13099 hv_type = instance.hypervisor
13100 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13101 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13102 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13105 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13106 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13107 self.hv_proposed = self.hv_new = hv_new # the new actual values
13108 self.hv_inst = i_hvdict # the new dict (without defaults)
13110 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13112 self.hv_new = self.hv_inst = {}
13114 # beparams processing
13115 if self.op.beparams:
13116 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13118 objects.UpgradeBeParams(i_bedict)
13119 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13120 be_new = cluster.SimpleFillBE(i_bedict)
13121 self.be_proposed = self.be_new = be_new # the new actual values
13122 self.be_inst = i_bedict # the new dict (without defaults)
13124 self.be_new = self.be_inst = {}
13125 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13126 be_old = cluster.FillBE(instance)
13128 # CPU param validation -- checking every time a parameter is
13129 # changed to cover all cases where either CPU mask or vcpus have
13131 if (constants.BE_VCPUS in self.be_proposed and
13132 constants.HV_CPU_MASK in self.hv_proposed):
13134 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13135 # Verify mask is consistent with number of vCPUs. Can skip this
13136 # test if only 1 entry in the CPU mask, which means same mask
13137 # is applied to all vCPUs.
13138 if (len(cpu_list) > 1 and
13139 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13140 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13142 (self.be_proposed[constants.BE_VCPUS],
13143 self.hv_proposed[constants.HV_CPU_MASK]),
13144 errors.ECODE_INVAL)
13146 # Only perform this test if a new CPU mask is given
13147 if constants.HV_CPU_MASK in self.hv_new:
13148 # Calculate the largest CPU number requested
13149 max_requested_cpu = max(map(max, cpu_list))
13150 # Check that all of the instance's nodes have enough physical CPUs to
13151 # satisfy the requested CPU mask
13152 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13153 max_requested_cpu + 1, instance.hypervisor)
13155 # osparams processing
13156 if self.op.osparams:
13157 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13158 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13159 self.os_inst = i_osdict # the new dict (without defaults)
13165 #TODO(dynmem): do the appropriate check involving MINMEM
13166 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13167 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13168 mem_check_list = [pnode]
13169 if be_new[constants.BE_AUTO_BALANCE]:
13170 # either we changed auto_balance to yes or it was from before
13171 mem_check_list.extend(instance.secondary_nodes)
13172 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13173 instance.hypervisor)
13174 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13175 [instance.hypervisor])
13176 pninfo = nodeinfo[pnode]
13177 msg = pninfo.fail_msg
13179 # Assume the primary node is unreachable and go ahead
13180 self.warn.append("Can't get info from primary node %s: %s" %
13183 (_, _, (pnhvinfo, )) = pninfo.payload
13184 if not isinstance(pnhvinfo.get("memory_free", None), int):
13185 self.warn.append("Node data from primary node %s doesn't contain"
13186 " free memory information" % pnode)
13187 elif instance_info.fail_msg:
13188 self.warn.append("Can't get instance runtime information: %s" %
13189 instance_info.fail_msg)
13191 if instance_info.payload:
13192 current_mem = int(instance_info.payload["memory"])
13194 # Assume instance not running
13195 # (there is a slight race condition here, but it's not very
13196 # probable, and we have no other way to check)
13197 # TODO: Describe race condition
13199 #TODO(dynmem): do the appropriate check involving MINMEM
13200 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13201 pnhvinfo["memory_free"])
13203 raise errors.OpPrereqError("This change will prevent the instance"
13204 " from starting, due to %d MB of memory"
13205 " missing on its primary node" %
13206 miss_mem, errors.ECODE_NORES)
13208 if be_new[constants.BE_AUTO_BALANCE]:
13209 for node, nres in nodeinfo.items():
13210 if node not in instance.secondary_nodes:
13212 nres.Raise("Can't get info from secondary node %s" % node,
13213 prereq=True, ecode=errors.ECODE_STATE)
13214 (_, _, (nhvinfo, )) = nres.payload
13215 if not isinstance(nhvinfo.get("memory_free", None), int):
13216 raise errors.OpPrereqError("Secondary node %s didn't return free"
13217 " memory information" % node,
13218 errors.ECODE_STATE)
13219 #TODO(dynmem): do the appropriate check involving MINMEM
13220 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13221 raise errors.OpPrereqError("This change will prevent the instance"
13222 " from failover to its secondary node"
13223 " %s, due to not enough memory" % node,
13224 errors.ECODE_STATE)
13226 if self.op.runtime_mem:
13227 remote_info = self.rpc.call_instance_info(instance.primary_node,
13229 instance.hypervisor)
13230 remote_info.Raise("Error checking node %s" % instance.primary_node)
13231 if not remote_info.payload: # not running already
13232 raise errors.OpPrereqError("Instance %s is not running" %
13233 instance.name, errors.ECODE_STATE)
13235 current_memory = remote_info.payload["memory"]
13236 if (not self.op.force and
13237 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13238 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13239 raise errors.OpPrereqError("Instance %s must have memory between %d"
13240 " and %d MB of memory unless --force is"
13243 self.be_proposed[constants.BE_MINMEM],
13244 self.be_proposed[constants.BE_MAXMEM]),
13245 errors.ECODE_INVAL)
13247 delta = self.op.runtime_mem - current_memory
13249 _CheckNodeFreeMemory(self, instance.primary_node,
13250 "ballooning memory for instance %s" %
13251 instance.name, delta, instance.hypervisor)
13253 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13254 raise errors.OpPrereqError("Disk operations not supported for"
13255 " diskless instances", errors.ECODE_INVAL)
13257 def _PrepareNicCreate(_, params, private):
13258 self._PrepareNicModification(params, private, None, None,
13259 {}, cluster, pnode)
13260 return (None, None)
13262 def _PrepareNicMod(_, nic, params, private):
13263 self._PrepareNicModification(params, private, nic.ip, nic.network,
13264 nic.nicparams, cluster, pnode)
13267 def _PrepareNicRemove(_, params, __):
13269 net = params.network
13270 if net is not None and ip is not None:
13271 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13273 # Verify NIC changes (operating on copy)
13274 nics = instance.nics[:]
13275 ApplyContainerMods("NIC", nics, None, self.nicmod,
13276 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13277 if len(nics) > constants.MAX_NICS:
13278 raise errors.OpPrereqError("Instance has too many network interfaces"
13279 " (%d), cannot add more" % constants.MAX_NICS,
13280 errors.ECODE_STATE)
13282 # Verify disk changes (operating on a copy)
13283 disks = instance.disks[:]
13284 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13285 if len(disks) > constants.MAX_DISKS:
13286 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13287 " more" % constants.MAX_DISKS,
13288 errors.ECODE_STATE)
13289 disk_sizes = [disk.size for disk in instance.disks]
13290 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13292 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13293 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13295 if self.op.offline is not None:
13296 if self.op.offline:
13297 msg = "can't change to offline"
13299 msg = "can't change to online"
13300 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13302 # Pre-compute NIC changes (necessary to use result in hooks)
13303 self._nic_chgdesc = []
13305 # Operate on copies as this is still in prereq
13306 nics = [nic.Copy() for nic in instance.nics]
13307 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13308 self._CreateNewNic, self._ApplyNicMods, None)
13309 self._new_nics = nics
13310 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13312 self._new_nics = None
13313 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13315 if not self.op.ignore_ipolicy:
13316 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13319 # Fill ispec with backend parameters
13320 ispec[constants.ISPEC_SPINDLE_USE] = \
13321 self.be_new.get(constants.BE_SPINDLE_USE, None)
13322 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13325 # Copy ispec to verify parameters with min/max values separately
13326 ispec_max = ispec.copy()
13327 ispec_max[constants.ISPEC_MEM_SIZE] = \
13328 self.be_new.get(constants.BE_MAXMEM, None)
13329 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13330 ispec_min = ispec.copy()
13331 ispec_min[constants.ISPEC_MEM_SIZE] = \
13332 self.be_new.get(constants.BE_MINMEM, None)
13333 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13335 if (res_max or res_min):
13336 # FIXME: Improve error message by including information about whether
13337 # the upper or lower limit of the parameter fails the ipolicy.
13338 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13339 (group_info, group_info.name,
13340 utils.CommaJoin(set(res_max + res_min))))
13341 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13343 def _ConvertPlainToDrbd(self, feedback_fn):
13344 """Converts an instance from plain to drbd.
13347 feedback_fn("Converting template to drbd")
13348 instance = self.instance
13349 pnode = instance.primary_node
13350 snode = self.op.remote_node
13352 assert instance.disk_template == constants.DT_PLAIN
13354 # create a fake disk info for _GenerateDiskTemplate
13355 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13356 constants.IDISK_VG: d.logical_id[0]}
13357 for d in instance.disks]
13358 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13359 instance.name, pnode, [snode],
13360 disk_info, None, None, 0, feedback_fn,
13362 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13364 info = _GetInstanceInfoText(instance)
13365 feedback_fn("Creating additional volumes...")
13366 # first, create the missing data and meta devices
13367 for disk in anno_disks:
13368 # unfortunately this is... not too nice
13369 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13371 for child in disk.children:
13372 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13373 # at this stage, all new LVs have been created, we can rename the
13375 feedback_fn("Renaming original volumes...")
13376 rename_list = [(o, n.children[0].logical_id)
13377 for (o, n) in zip(instance.disks, new_disks)]
13378 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13379 result.Raise("Failed to rename original LVs")
13381 feedback_fn("Initializing DRBD devices...")
13382 # all child devices are in place, we can now create the DRBD devices
13383 for disk in anno_disks:
13384 for node in [pnode, snode]:
13385 f_create = node == pnode
13386 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13388 # at this point, the instance has been modified
13389 instance.disk_template = constants.DT_DRBD8
13390 instance.disks = new_disks
13391 self.cfg.Update(instance, feedback_fn)
13393 # Release node locks while waiting for sync
13394 _ReleaseLocks(self, locking.LEVEL_NODE)
13396 # disks are created, waiting for sync
13397 disk_abort = not _WaitForSync(self, instance,
13398 oneshot=not self.op.wait_for_sync)
13400 raise errors.OpExecError("There are some degraded disks for"
13401 " this instance, please cleanup manually")
13403 # Node resource locks will be released by caller
13405 def _ConvertDrbdToPlain(self, feedback_fn):
13406 """Converts an instance from drbd to plain.
13409 instance = self.instance
13411 assert len(instance.secondary_nodes) == 1
13412 assert instance.disk_template == constants.DT_DRBD8
13414 pnode = instance.primary_node
13415 snode = instance.secondary_nodes[0]
13416 feedback_fn("Converting template to plain")
13418 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13419 new_disks = [d.children[0] for d in instance.disks]
13421 # copy over size and mode
13422 for parent, child in zip(old_disks, new_disks):
13423 child.size = parent.size
13424 child.mode = parent.mode
13426 # this is a DRBD disk, return its port to the pool
13427 # NOTE: this must be done right before the call to cfg.Update!
13428 for disk in old_disks:
13429 tcp_port = disk.logical_id[2]
13430 self.cfg.AddTcpUdpPort(tcp_port)
13432 # update instance structure
13433 instance.disks = new_disks
13434 instance.disk_template = constants.DT_PLAIN
13435 self.cfg.Update(instance, feedback_fn)
13437 # Release locks in case removing disks takes a while
13438 _ReleaseLocks(self, locking.LEVEL_NODE)
13440 feedback_fn("Removing volumes on the secondary node...")
13441 for disk in old_disks:
13442 self.cfg.SetDiskID(disk, snode)
13443 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13445 self.LogWarning("Could not remove block device %s on node %s,"
13446 " continuing anyway: %s", disk.iv_name, snode, msg)
13448 feedback_fn("Removing unneeded volumes on the primary node...")
13449 for idx, disk in enumerate(old_disks):
13450 meta = disk.children[1]
13451 self.cfg.SetDiskID(meta, pnode)
13452 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13454 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13455 " continuing anyway: %s", idx, pnode, msg)
13457 def _CreateNewDisk(self, idx, params, _):
13458 """Creates a new disk.
13461 instance = self.instance
13464 if instance.disk_template in constants.DTS_FILEBASED:
13465 (file_driver, file_path) = instance.disks[0].logical_id
13466 file_path = os.path.dirname(file_path)
13468 file_driver = file_path = None
13471 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13472 instance.primary_node, instance.secondary_nodes,
13473 [params], file_path, file_driver, idx,
13474 self.Log, self.diskparams)[0]
13476 info = _GetInstanceInfoText(instance)
13478 logging.info("Creating volume %s for instance %s",
13479 disk.iv_name, instance.name)
13480 # Note: this needs to be kept in sync with _CreateDisks
13482 for node in instance.all_nodes:
13483 f_create = (node == instance.primary_node)
13485 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13486 except errors.OpExecError, err:
13487 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13488 disk.iv_name, disk, node, err)
13491 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13495 def _ModifyDisk(idx, disk, params, _):
13496 """Modifies a disk.
13499 disk.mode = params[constants.IDISK_MODE]
13502 ("disk.mode/%d" % idx, disk.mode),
13505 def _RemoveDisk(self, idx, root, _):
13509 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13510 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13511 self.cfg.SetDiskID(disk, node)
13512 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13514 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13515 " continuing anyway", idx, node, msg)
13517 # if this is a DRBD disk, return its port to the pool
13518 if root.dev_type in constants.LDS_DRBD:
13519 self.cfg.AddTcpUdpPort(root.logical_id[2])
13522 def _CreateNewNic(idx, params, private):
13523 """Creates data structure for a new network interface.
13526 mac = params[constants.INIC_MAC]
13527 ip = params.get(constants.INIC_IP, None)
13528 net = params.get(constants.INIC_NETWORK, None)
13529 #TODO: not private.filled?? can a nic have no nicparams??
13530 nicparams = private.filled
13532 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13534 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13535 (mac, ip, private.filled[constants.NIC_MODE],
13536 private.filled[constants.NIC_LINK],
13541 def _ApplyNicMods(idx, nic, params, private):
13542 """Modifies a network interface.
13547 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13549 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13550 setattr(nic, key, params[key])
13553 nic.nicparams = private.filled
13555 for (key, val) in nic.nicparams.items():
13556 changes.append(("nic.%s/%d" % (key, idx), val))
13560 def Exec(self, feedback_fn):
13561 """Modifies an instance.
13563 All parameters take effect only at the next restart of the instance.
13566 # Process here the warnings from CheckPrereq, as we don't have a
13567 # feedback_fn there.
13568 # TODO: Replace with self.LogWarning
13569 for warn in self.warn:
13570 feedback_fn("WARNING: %s" % warn)
13572 assert ((self.op.disk_template is None) ^
13573 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13574 "Not owning any node resource locks"
13577 instance = self.instance
13580 if self.op.runtime_mem:
13581 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13583 self.op.runtime_mem)
13584 rpcres.Raise("Cannot modify instance runtime memory")
13585 result.append(("runtime_memory", self.op.runtime_mem))
13587 # Apply disk changes
13588 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13589 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13590 _UpdateIvNames(0, instance.disks)
13592 if self.op.disk_template:
13594 check_nodes = set(instance.all_nodes)
13595 if self.op.remote_node:
13596 check_nodes.add(self.op.remote_node)
13597 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13598 owned = self.owned_locks(level)
13599 assert not (check_nodes - owned), \
13600 ("Not owning the correct locks, owning %r, expected at least %r" %
13601 (owned, check_nodes))
13603 r_shut = _ShutdownInstanceDisks(self, instance)
13605 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13606 " proceed with disk template conversion")
13607 mode = (instance.disk_template, self.op.disk_template)
13609 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13611 self.cfg.ReleaseDRBDMinors(instance.name)
13613 result.append(("disk_template", self.op.disk_template))
13615 assert instance.disk_template == self.op.disk_template, \
13616 ("Expected disk template '%s', found '%s'" %
13617 (self.op.disk_template, instance.disk_template))
13619 # Release node and resource locks if there are any (they might already have
13620 # been released during disk conversion)
13621 _ReleaseLocks(self, locking.LEVEL_NODE)
13622 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13624 # Apply NIC changes
13625 if self._new_nics is not None:
13626 instance.nics = self._new_nics
13627 result.extend(self._nic_chgdesc)
13630 if self.op.hvparams:
13631 instance.hvparams = self.hv_inst
13632 for key, val in self.op.hvparams.iteritems():
13633 result.append(("hv/%s" % key, val))
13636 if self.op.beparams:
13637 instance.beparams = self.be_inst
13638 for key, val in self.op.beparams.iteritems():
13639 result.append(("be/%s" % key, val))
13642 if self.op.os_name:
13643 instance.os = self.op.os_name
13646 if self.op.osparams:
13647 instance.osparams = self.os_inst
13648 for key, val in self.op.osparams.iteritems():
13649 result.append(("os/%s" % key, val))
13651 if self.op.offline is None:
13654 elif self.op.offline:
13655 # Mark instance as offline
13656 self.cfg.MarkInstanceOffline(instance.name)
13657 result.append(("admin_state", constants.ADMINST_OFFLINE))
13659 # Mark instance as online, but stopped
13660 self.cfg.MarkInstanceDown(instance.name)
13661 result.append(("admin_state", constants.ADMINST_DOWN))
13663 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13665 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13666 self.owned_locks(locking.LEVEL_NODE)), \
13667 "All node locks should have been released by now"
13671 _DISK_CONVERSIONS = {
13672 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13673 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13677 class LUInstanceChangeGroup(LogicalUnit):
13678 HPATH = "instance-change-group"
13679 HTYPE = constants.HTYPE_INSTANCE
13682 def ExpandNames(self):
13683 self.share_locks = _ShareAll()
13684 self.needed_locks = {
13685 locking.LEVEL_NODEGROUP: [],
13686 locking.LEVEL_NODE: [],
13689 self._ExpandAndLockInstance()
13691 if self.op.target_groups:
13692 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13693 self.op.target_groups)
13695 self.req_target_uuids = None
13697 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13699 def DeclareLocks(self, level):
13700 if level == locking.LEVEL_NODEGROUP:
13701 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13703 if self.req_target_uuids:
13704 lock_groups = set(self.req_target_uuids)
13706 # Lock all groups used by instance optimistically; this requires going
13707 # via the node before it's locked, requiring verification later on
13708 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13709 lock_groups.update(instance_groups)
13711 # No target groups, need to lock all of them
13712 lock_groups = locking.ALL_SET
13714 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13716 elif level == locking.LEVEL_NODE:
13717 if self.req_target_uuids:
13718 # Lock all nodes used by instances
13719 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13720 self._LockInstancesNodes()
13722 # Lock all nodes in all potential target groups
13723 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13724 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13725 member_nodes = [node_name
13726 for group in lock_groups
13727 for node_name in self.cfg.GetNodeGroup(group).members]
13728 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13730 # Lock all nodes as all groups are potential targets
13731 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13733 def CheckPrereq(self):
13734 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13735 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13736 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13738 assert (self.req_target_uuids is None or
13739 owned_groups.issuperset(self.req_target_uuids))
13740 assert owned_instances == set([self.op.instance_name])
13742 # Get instance information
13743 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13745 # Check if node groups for locked instance are still correct
13746 assert owned_nodes.issuperset(self.instance.all_nodes), \
13747 ("Instance %s's nodes changed while we kept the lock" %
13748 self.op.instance_name)
13750 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13753 if self.req_target_uuids:
13754 # User requested specific target groups
13755 self.target_uuids = frozenset(self.req_target_uuids)
13757 # All groups except those used by the instance are potential targets
13758 self.target_uuids = owned_groups - inst_groups
13760 conflicting_groups = self.target_uuids & inst_groups
13761 if conflicting_groups:
13762 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13763 " used by the instance '%s'" %
13764 (utils.CommaJoin(conflicting_groups),
13765 self.op.instance_name),
13766 errors.ECODE_INVAL)
13768 if not self.target_uuids:
13769 raise errors.OpPrereqError("There are no possible target groups",
13770 errors.ECODE_INVAL)
13772 def BuildHooksEnv(self):
13773 """Build hooks env.
13776 assert self.target_uuids
13779 "TARGET_GROUPS": " ".join(self.target_uuids),
13782 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13786 def BuildHooksNodes(self):
13787 """Build hooks nodes.
13790 mn = self.cfg.GetMasterNode()
13791 return ([mn], [mn])
13793 def Exec(self, feedback_fn):
13794 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13796 assert instances == [self.op.instance_name], "Instance not locked"
13798 req = iallocator.IAReqGroupChange(instances=instances,
13799 target_groups=list(self.target_uuids))
13800 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13802 ial.Run(self.op.iallocator)
13804 if not ial.success:
13805 raise errors.OpPrereqError("Can't compute solution for changing group of"
13806 " instance '%s' using iallocator '%s': %s" %
13807 (self.op.instance_name, self.op.iallocator,
13808 ial.info), errors.ECODE_NORES)
13810 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13812 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13813 " instance '%s'", len(jobs), self.op.instance_name)
13815 return ResultWithJobs(jobs)
13818 class LUBackupQuery(NoHooksLU):
13819 """Query the exports list
13824 def CheckArguments(self):
13825 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13826 ["node", "export"], self.op.use_locking)
13828 def ExpandNames(self):
13829 self.expq.ExpandNames(self)
13831 def DeclareLocks(self, level):
13832 self.expq.DeclareLocks(self, level)
13834 def Exec(self, feedback_fn):
13837 for (node, expname) in self.expq.OldStyleQuery(self):
13838 if expname is None:
13839 result[node] = False
13841 result.setdefault(node, []).append(expname)
13846 class _ExportQuery(_QueryBase):
13847 FIELDS = query.EXPORT_FIELDS
13849 #: The node name is not a unique key for this query
13850 SORT_FIELD = "node"
13852 def ExpandNames(self, lu):
13853 lu.needed_locks = {}
13855 # The following variables interact with _QueryBase._GetNames
13857 self.wanted = _GetWantedNodes(lu, self.names)
13859 self.wanted = locking.ALL_SET
13861 self.do_locking = self.use_locking
13863 if self.do_locking:
13864 lu.share_locks = _ShareAll()
13865 lu.needed_locks = {
13866 locking.LEVEL_NODE: self.wanted,
13869 def DeclareLocks(self, lu, level):
13872 def _GetQueryData(self, lu):
13873 """Computes the list of nodes and their attributes.
13876 # Locking is not used
13878 assert not (compat.any(lu.glm.is_owned(level)
13879 for level in locking.LEVELS
13880 if level != locking.LEVEL_CLUSTER) or
13881 self.do_locking or self.use_locking)
13883 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13887 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13889 result.append((node, None))
13891 result.extend((node, expname) for expname in nres.payload)
13896 class LUBackupPrepare(NoHooksLU):
13897 """Prepares an instance for an export and returns useful information.
13902 def ExpandNames(self):
13903 self._ExpandAndLockInstance()
13905 def CheckPrereq(self):
13906 """Check prerequisites.
13909 instance_name = self.op.instance_name
13911 self.instance = self.cfg.GetInstanceInfo(instance_name)
13912 assert self.instance is not None, \
13913 "Cannot retrieve locked instance %s" % self.op.instance_name
13914 _CheckNodeOnline(self, self.instance.primary_node)
13916 self._cds = _GetClusterDomainSecret()
13918 def Exec(self, feedback_fn):
13919 """Prepares an instance for an export.
13922 instance = self.instance
13924 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13925 salt = utils.GenerateSecret(8)
13927 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13928 result = self.rpc.call_x509_cert_create(instance.primary_node,
13929 constants.RIE_CERT_VALIDITY)
13930 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13932 (name, cert_pem) = result.payload
13934 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13938 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13939 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13941 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13947 class LUBackupExport(LogicalUnit):
13948 """Export an instance to an image in the cluster.
13951 HPATH = "instance-export"
13952 HTYPE = constants.HTYPE_INSTANCE
13955 def CheckArguments(self):
13956 """Check the arguments.
13959 self.x509_key_name = self.op.x509_key_name
13960 self.dest_x509_ca_pem = self.op.destination_x509_ca
13962 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13963 if not self.x509_key_name:
13964 raise errors.OpPrereqError("Missing X509 key name for encryption",
13965 errors.ECODE_INVAL)
13967 if not self.dest_x509_ca_pem:
13968 raise errors.OpPrereqError("Missing destination X509 CA",
13969 errors.ECODE_INVAL)
13971 def ExpandNames(self):
13972 self._ExpandAndLockInstance()
13974 # Lock all nodes for local exports
13975 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13976 # FIXME: lock only instance primary and destination node
13978 # Sad but true, for now we have do lock all nodes, as we don't know where
13979 # the previous export might be, and in this LU we search for it and
13980 # remove it from its current node. In the future we could fix this by:
13981 # - making a tasklet to search (share-lock all), then create the
13982 # new one, then one to remove, after
13983 # - removing the removal operation altogether
13984 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13986 def DeclareLocks(self, level):
13987 """Last minute lock declaration."""
13988 # All nodes are locked anyway, so nothing to do here.
13990 def BuildHooksEnv(self):
13991 """Build hooks env.
13993 This will run on the master, primary node and target node.
13997 "EXPORT_MODE": self.op.mode,
13998 "EXPORT_NODE": self.op.target_node,
13999 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14000 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14001 # TODO: Generic function for boolean env variables
14002 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14005 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14009 def BuildHooksNodes(self):
14010 """Build hooks nodes.
14013 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14015 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14016 nl.append(self.op.target_node)
14020 def CheckPrereq(self):
14021 """Check prerequisites.
14023 This checks that the instance and node names are valid.
14026 instance_name = self.op.instance_name
14028 self.instance = self.cfg.GetInstanceInfo(instance_name)
14029 assert self.instance is not None, \
14030 "Cannot retrieve locked instance %s" % self.op.instance_name
14031 _CheckNodeOnline(self, self.instance.primary_node)
14033 if (self.op.remove_instance and
14034 self.instance.admin_state == constants.ADMINST_UP and
14035 not self.op.shutdown):
14036 raise errors.OpPrereqError("Can not remove instance without shutting it"
14037 " down before", errors.ECODE_STATE)
14039 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14040 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14041 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14042 assert self.dst_node is not None
14044 _CheckNodeOnline(self, self.dst_node.name)
14045 _CheckNodeNotDrained(self, self.dst_node.name)
14048 self.dest_disk_info = None
14049 self.dest_x509_ca = None
14051 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14052 self.dst_node = None
14054 if len(self.op.target_node) != len(self.instance.disks):
14055 raise errors.OpPrereqError(("Received destination information for %s"
14056 " disks, but instance %s has %s disks") %
14057 (len(self.op.target_node), instance_name,
14058 len(self.instance.disks)),
14059 errors.ECODE_INVAL)
14061 cds = _GetClusterDomainSecret()
14063 # Check X509 key name
14065 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14066 except (TypeError, ValueError), err:
14067 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14068 errors.ECODE_INVAL)
14070 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14071 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14072 errors.ECODE_INVAL)
14074 # Load and verify CA
14076 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14077 except OpenSSL.crypto.Error, err:
14078 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14079 (err, ), errors.ECODE_INVAL)
14081 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14082 if errcode is not None:
14083 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14084 (msg, ), errors.ECODE_INVAL)
14086 self.dest_x509_ca = cert
14088 # Verify target information
14090 for idx, disk_data in enumerate(self.op.target_node):
14092 (host, port, magic) = \
14093 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14094 except errors.GenericError, err:
14095 raise errors.OpPrereqError("Target info for disk %s: %s" %
14096 (idx, err), errors.ECODE_INVAL)
14098 disk_info.append((host, port, magic))
14100 assert len(disk_info) == len(self.op.target_node)
14101 self.dest_disk_info = disk_info
14104 raise errors.ProgrammerError("Unhandled export mode %r" %
14107 # instance disk type verification
14108 # TODO: Implement export support for file-based disks
14109 for disk in self.instance.disks:
14110 if disk.dev_type == constants.LD_FILE:
14111 raise errors.OpPrereqError("Export not supported for instances with"
14112 " file-based disks", errors.ECODE_INVAL)
14114 def _CleanupExports(self, feedback_fn):
14115 """Removes exports of current instance from all other nodes.
14117 If an instance in a cluster with nodes A..D was exported to node C, its
14118 exports will be removed from the nodes A, B and D.
14121 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14123 nodelist = self.cfg.GetNodeList()
14124 nodelist.remove(self.dst_node.name)
14126 # on one-node clusters nodelist will be empty after the removal
14127 # if we proceed the backup would be removed because OpBackupQuery
14128 # substitutes an empty list with the full cluster node list.
14129 iname = self.instance.name
14131 feedback_fn("Removing old exports for instance %s" % iname)
14132 exportlist = self.rpc.call_export_list(nodelist)
14133 for node in exportlist:
14134 if exportlist[node].fail_msg:
14136 if iname in exportlist[node].payload:
14137 msg = self.rpc.call_export_remove(node, iname).fail_msg
14139 self.LogWarning("Could not remove older export for instance %s"
14140 " on node %s: %s", iname, node, msg)
14142 def Exec(self, feedback_fn):
14143 """Export an instance to an image in the cluster.
14146 assert self.op.mode in constants.EXPORT_MODES
14148 instance = self.instance
14149 src_node = instance.primary_node
14151 if self.op.shutdown:
14152 # shutdown the instance, but not the disks
14153 feedback_fn("Shutting down instance %s" % instance.name)
14154 result = self.rpc.call_instance_shutdown(src_node, instance,
14155 self.op.shutdown_timeout)
14156 # TODO: Maybe ignore failures if ignore_remove_failures is set
14157 result.Raise("Could not shutdown instance %s on"
14158 " node %s" % (instance.name, src_node))
14160 # set the disks ID correctly since call_instance_start needs the
14161 # correct drbd minor to create the symlinks
14162 for disk in instance.disks:
14163 self.cfg.SetDiskID(disk, src_node)
14165 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14168 # Activate the instance disks if we'exporting a stopped instance
14169 feedback_fn("Activating disks for %s" % instance.name)
14170 _StartInstanceDisks(self, instance, None)
14173 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14176 helper.CreateSnapshots()
14178 if (self.op.shutdown and
14179 instance.admin_state == constants.ADMINST_UP and
14180 not self.op.remove_instance):
14181 assert not activate_disks
14182 feedback_fn("Starting instance %s" % instance.name)
14183 result = self.rpc.call_instance_start(src_node,
14184 (instance, None, None), False)
14185 msg = result.fail_msg
14187 feedback_fn("Failed to start instance: %s" % msg)
14188 _ShutdownInstanceDisks(self, instance)
14189 raise errors.OpExecError("Could not start instance: %s" % msg)
14191 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14192 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14193 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14194 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14195 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14197 (key_name, _, _) = self.x509_key_name
14200 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14203 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14204 key_name, dest_ca_pem,
14209 # Check for backwards compatibility
14210 assert len(dresults) == len(instance.disks)
14211 assert compat.all(isinstance(i, bool) for i in dresults), \
14212 "Not all results are boolean: %r" % dresults
14216 feedback_fn("Deactivating disks for %s" % instance.name)
14217 _ShutdownInstanceDisks(self, instance)
14219 if not (compat.all(dresults) and fin_resu):
14222 failures.append("export finalization")
14223 if not compat.all(dresults):
14224 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14226 failures.append("disk export: disk(s) %s" % fdsk)
14228 raise errors.OpExecError("Export failed, errors in %s" %
14229 utils.CommaJoin(failures))
14231 # At this point, the export was successful, we can cleanup/finish
14233 # Remove instance if requested
14234 if self.op.remove_instance:
14235 feedback_fn("Removing instance %s" % instance.name)
14236 _RemoveInstance(self, feedback_fn, instance,
14237 self.op.ignore_remove_failures)
14239 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14240 self._CleanupExports(feedback_fn)
14242 return fin_resu, dresults
14245 class LUBackupRemove(NoHooksLU):
14246 """Remove exports related to the named instance.
14251 def ExpandNames(self):
14252 self.needed_locks = {}
14253 # We need all nodes to be locked in order for RemoveExport to work, but we
14254 # don't need to lock the instance itself, as nothing will happen to it (and
14255 # we can remove exports also for a removed instance)
14256 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14258 def Exec(self, feedback_fn):
14259 """Remove any export.
14262 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14263 # If the instance was not found we'll try with the name that was passed in.
14264 # This will only work if it was an FQDN, though.
14266 if not instance_name:
14268 instance_name = self.op.instance_name
14270 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14271 exportlist = self.rpc.call_export_list(locked_nodes)
14273 for node in exportlist:
14274 msg = exportlist[node].fail_msg
14276 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14278 if instance_name in exportlist[node].payload:
14280 result = self.rpc.call_export_remove(node, instance_name)
14281 msg = result.fail_msg
14283 logging.error("Could not remove export for instance %s"
14284 " on node %s: %s", instance_name, node, msg)
14286 if fqdn_warn and not found:
14287 feedback_fn("Export not found. If trying to remove an export belonging"
14288 " to a deleted instance please use its Fully Qualified"
14292 class LUGroupAdd(LogicalUnit):
14293 """Logical unit for creating node groups.
14296 HPATH = "group-add"
14297 HTYPE = constants.HTYPE_GROUP
14300 def ExpandNames(self):
14301 # We need the new group's UUID here so that we can create and acquire the
14302 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14303 # that it should not check whether the UUID exists in the configuration.
14304 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14305 self.needed_locks = {}
14306 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14308 def CheckPrereq(self):
14309 """Check prerequisites.
14311 This checks that the given group name is not an existing node group
14316 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14317 except errors.OpPrereqError:
14320 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14321 " node group (UUID: %s)" %
14322 (self.op.group_name, existing_uuid),
14323 errors.ECODE_EXISTS)
14325 if self.op.ndparams:
14326 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14328 if self.op.hv_state:
14329 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14331 self.new_hv_state = None
14333 if self.op.disk_state:
14334 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14336 self.new_disk_state = None
14338 if self.op.diskparams:
14339 for templ in constants.DISK_TEMPLATES:
14340 if templ in self.op.diskparams:
14341 utils.ForceDictType(self.op.diskparams[templ],
14342 constants.DISK_DT_TYPES)
14343 self.new_diskparams = self.op.diskparams
14345 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14346 except errors.OpPrereqError, err:
14347 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14348 errors.ECODE_INVAL)
14350 self.new_diskparams = {}
14352 if self.op.ipolicy:
14353 cluster = self.cfg.GetClusterInfo()
14354 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14356 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14357 except errors.ConfigurationError, err:
14358 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14359 errors.ECODE_INVAL)
14361 def BuildHooksEnv(self):
14362 """Build hooks env.
14366 "GROUP_NAME": self.op.group_name,
14369 def BuildHooksNodes(self):
14370 """Build hooks nodes.
14373 mn = self.cfg.GetMasterNode()
14374 return ([mn], [mn])
14376 def Exec(self, feedback_fn):
14377 """Add the node group to the cluster.
14380 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14381 uuid=self.group_uuid,
14382 alloc_policy=self.op.alloc_policy,
14383 ndparams=self.op.ndparams,
14384 diskparams=self.new_diskparams,
14385 ipolicy=self.op.ipolicy,
14386 hv_state_static=self.new_hv_state,
14387 disk_state_static=self.new_disk_state)
14389 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14390 del self.remove_locks[locking.LEVEL_NODEGROUP]
14393 class LUGroupAssignNodes(NoHooksLU):
14394 """Logical unit for assigning nodes to groups.
14399 def ExpandNames(self):
14400 # These raise errors.OpPrereqError on their own:
14401 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14402 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14404 # We want to lock all the affected nodes and groups. We have readily
14405 # available the list of nodes, and the *destination* group. To gather the
14406 # list of "source" groups, we need to fetch node information later on.
14407 self.needed_locks = {
14408 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14409 locking.LEVEL_NODE: self.op.nodes,
14412 def DeclareLocks(self, level):
14413 if level == locking.LEVEL_NODEGROUP:
14414 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14416 # Try to get all affected nodes' groups without having the group or node
14417 # lock yet. Needs verification later in the code flow.
14418 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14420 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14422 def CheckPrereq(self):
14423 """Check prerequisites.
14426 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14427 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14428 frozenset(self.op.nodes))
14430 expected_locks = (set([self.group_uuid]) |
14431 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14432 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14433 if actual_locks != expected_locks:
14434 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14435 " current groups are '%s', used to be '%s'" %
14436 (utils.CommaJoin(expected_locks),
14437 utils.CommaJoin(actual_locks)))
14439 self.node_data = self.cfg.GetAllNodesInfo()
14440 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14441 instance_data = self.cfg.GetAllInstancesInfo()
14443 if self.group is None:
14444 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14445 (self.op.group_name, self.group_uuid))
14447 (new_splits, previous_splits) = \
14448 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14449 for node in self.op.nodes],
14450 self.node_data, instance_data)
14453 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14455 if not self.op.force:
14456 raise errors.OpExecError("The following instances get split by this"
14457 " change and --force was not given: %s" %
14460 self.LogWarning("This operation will split the following instances: %s",
14463 if previous_splits:
14464 self.LogWarning("In addition, these already-split instances continue"
14465 " to be split across groups: %s",
14466 utils.CommaJoin(utils.NiceSort(previous_splits)))
14468 def Exec(self, feedback_fn):
14469 """Assign nodes to a new group.
14472 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14474 self.cfg.AssignGroupNodes(mods)
14477 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14478 """Check for split instances after a node assignment.
14480 This method considers a series of node assignments as an atomic operation,
14481 and returns information about split instances after applying the set of
14484 In particular, it returns information about newly split instances, and
14485 instances that were already split, and remain so after the change.
14487 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14490 @type changes: list of (node_name, new_group_uuid) pairs.
14491 @param changes: list of node assignments to consider.
14492 @param node_data: a dict with data for all nodes
14493 @param instance_data: a dict with all instances to consider
14494 @rtype: a two-tuple
14495 @return: a list of instances that were previously okay and result split as a
14496 consequence of this change, and a list of instances that were previously
14497 split and this change does not fix.
14500 changed_nodes = dict((node, group) for node, group in changes
14501 if node_data[node].group != group)
14503 all_split_instances = set()
14504 previously_split_instances = set()
14506 def InstanceNodes(instance):
14507 return [instance.primary_node] + list(instance.secondary_nodes)
14509 for inst in instance_data.values():
14510 if inst.disk_template not in constants.DTS_INT_MIRROR:
14513 instance_nodes = InstanceNodes(inst)
14515 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14516 previously_split_instances.add(inst.name)
14518 if len(set(changed_nodes.get(node, node_data[node].group)
14519 for node in instance_nodes)) > 1:
14520 all_split_instances.add(inst.name)
14522 return (list(all_split_instances - previously_split_instances),
14523 list(previously_split_instances & all_split_instances))
14526 class _GroupQuery(_QueryBase):
14527 FIELDS = query.GROUP_FIELDS
14529 def ExpandNames(self, lu):
14530 lu.needed_locks = {}
14532 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14533 self._cluster = lu.cfg.GetClusterInfo()
14534 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14537 self.wanted = [name_to_uuid[name]
14538 for name in utils.NiceSort(name_to_uuid.keys())]
14540 # Accept names to be either names or UUIDs.
14543 all_uuid = frozenset(self._all_groups.keys())
14545 for name in self.names:
14546 if name in all_uuid:
14547 self.wanted.append(name)
14548 elif name in name_to_uuid:
14549 self.wanted.append(name_to_uuid[name])
14551 missing.append(name)
14554 raise errors.OpPrereqError("Some groups do not exist: %s" %
14555 utils.CommaJoin(missing),
14556 errors.ECODE_NOENT)
14558 def DeclareLocks(self, lu, level):
14561 def _GetQueryData(self, lu):
14562 """Computes the list of node groups and their attributes.
14565 do_nodes = query.GQ_NODE in self.requested_data
14566 do_instances = query.GQ_INST in self.requested_data
14568 group_to_nodes = None
14569 group_to_instances = None
14571 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14572 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14573 # latter GetAllInstancesInfo() is not enough, for we have to go through
14574 # instance->node. Hence, we will need to process nodes even if we only need
14575 # instance information.
14576 if do_nodes or do_instances:
14577 all_nodes = lu.cfg.GetAllNodesInfo()
14578 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14581 for node in all_nodes.values():
14582 if node.group in group_to_nodes:
14583 group_to_nodes[node.group].append(node.name)
14584 node_to_group[node.name] = node.group
14587 all_instances = lu.cfg.GetAllInstancesInfo()
14588 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14590 for instance in all_instances.values():
14591 node = instance.primary_node
14592 if node in node_to_group:
14593 group_to_instances[node_to_group[node]].append(instance.name)
14596 # Do not pass on node information if it was not requested.
14597 group_to_nodes = None
14599 return query.GroupQueryData(self._cluster,
14600 [self._all_groups[uuid]
14601 for uuid in self.wanted],
14602 group_to_nodes, group_to_instances,
14603 query.GQ_DISKPARAMS in self.requested_data)
14606 class LUGroupQuery(NoHooksLU):
14607 """Logical unit for querying node groups.
14612 def CheckArguments(self):
14613 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14614 self.op.output_fields, False)
14616 def ExpandNames(self):
14617 self.gq.ExpandNames(self)
14619 def DeclareLocks(self, level):
14620 self.gq.DeclareLocks(self, level)
14622 def Exec(self, feedback_fn):
14623 return self.gq.OldStyleQuery(self)
14626 class LUGroupSetParams(LogicalUnit):
14627 """Modifies the parameters of a node group.
14630 HPATH = "group-modify"
14631 HTYPE = constants.HTYPE_GROUP
14634 def CheckArguments(self):
14637 self.op.diskparams,
14638 self.op.alloc_policy,
14640 self.op.disk_state,
14644 if all_changes.count(None) == len(all_changes):
14645 raise errors.OpPrereqError("Please pass at least one modification",
14646 errors.ECODE_INVAL)
14648 def ExpandNames(self):
14649 # This raises errors.OpPrereqError on its own:
14650 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14652 self.needed_locks = {
14653 locking.LEVEL_INSTANCE: [],
14654 locking.LEVEL_NODEGROUP: [self.group_uuid],
14657 self.share_locks[locking.LEVEL_INSTANCE] = 1
14659 def DeclareLocks(self, level):
14660 if level == locking.LEVEL_INSTANCE:
14661 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14663 # Lock instances optimistically, needs verification once group lock has
14665 self.needed_locks[locking.LEVEL_INSTANCE] = \
14666 self.cfg.GetNodeGroupInstances(self.group_uuid)
14669 def _UpdateAndVerifyDiskParams(old, new):
14670 """Updates and verifies disk parameters.
14673 new_params = _GetUpdatedParams(old, new)
14674 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14677 def CheckPrereq(self):
14678 """Check prerequisites.
14681 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14683 # Check if locked instances are still correct
14684 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14686 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14687 cluster = self.cfg.GetClusterInfo()
14689 if self.group is None:
14690 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14691 (self.op.group_name, self.group_uuid))
14693 if self.op.ndparams:
14694 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14695 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14696 self.new_ndparams = new_ndparams
14698 if self.op.diskparams:
14699 diskparams = self.group.diskparams
14700 uavdp = self._UpdateAndVerifyDiskParams
14701 # For each disktemplate subdict update and verify the values
14702 new_diskparams = dict((dt,
14703 uavdp(diskparams.get(dt, {}),
14704 self.op.diskparams[dt]))
14705 for dt in constants.DISK_TEMPLATES
14706 if dt in self.op.diskparams)
14707 # As we've all subdicts of diskparams ready, lets merge the actual
14708 # dict with all updated subdicts
14709 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14711 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14712 except errors.OpPrereqError, err:
14713 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14714 errors.ECODE_INVAL)
14716 if self.op.hv_state:
14717 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14718 self.group.hv_state_static)
14720 if self.op.disk_state:
14721 self.new_disk_state = \
14722 _MergeAndVerifyDiskState(self.op.disk_state,
14723 self.group.disk_state_static)
14725 if self.op.ipolicy:
14726 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14730 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14731 inst_filter = lambda inst: inst.name in owned_instances
14732 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14733 gmi = ganeti.masterd.instance
14735 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14737 new_ipolicy, instances)
14740 self.LogWarning("After the ipolicy change the following instances"
14741 " violate them: %s",
14742 utils.CommaJoin(violations))
14744 def BuildHooksEnv(self):
14745 """Build hooks env.
14749 "GROUP_NAME": self.op.group_name,
14750 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14753 def BuildHooksNodes(self):
14754 """Build hooks nodes.
14757 mn = self.cfg.GetMasterNode()
14758 return ([mn], [mn])
14760 def Exec(self, feedback_fn):
14761 """Modifies the node group.
14766 if self.op.ndparams:
14767 self.group.ndparams = self.new_ndparams
14768 result.append(("ndparams", str(self.group.ndparams)))
14770 if self.op.diskparams:
14771 self.group.diskparams = self.new_diskparams
14772 result.append(("diskparams", str(self.group.diskparams)))
14774 if self.op.alloc_policy:
14775 self.group.alloc_policy = self.op.alloc_policy
14777 if self.op.hv_state:
14778 self.group.hv_state_static = self.new_hv_state
14780 if self.op.disk_state:
14781 self.group.disk_state_static = self.new_disk_state
14783 if self.op.ipolicy:
14784 self.group.ipolicy = self.new_ipolicy
14786 self.cfg.Update(self.group, feedback_fn)
14790 class LUGroupRemove(LogicalUnit):
14791 HPATH = "group-remove"
14792 HTYPE = constants.HTYPE_GROUP
14795 def ExpandNames(self):
14796 # This will raises errors.OpPrereqError on its own:
14797 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14798 self.needed_locks = {
14799 locking.LEVEL_NODEGROUP: [self.group_uuid],
14802 def CheckPrereq(self):
14803 """Check prerequisites.
14805 This checks that the given group name exists as a node group, that is
14806 empty (i.e., contains no nodes), and that is not the last group of the
14810 # Verify that the group is empty.
14811 group_nodes = [node.name
14812 for node in self.cfg.GetAllNodesInfo().values()
14813 if node.group == self.group_uuid]
14816 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14818 (self.op.group_name,
14819 utils.CommaJoin(utils.NiceSort(group_nodes))),
14820 errors.ECODE_STATE)
14822 # Verify the cluster would not be left group-less.
14823 if len(self.cfg.GetNodeGroupList()) == 1:
14824 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14825 " removed" % self.op.group_name,
14826 errors.ECODE_STATE)
14828 def BuildHooksEnv(self):
14829 """Build hooks env.
14833 "GROUP_NAME": self.op.group_name,
14836 def BuildHooksNodes(self):
14837 """Build hooks nodes.
14840 mn = self.cfg.GetMasterNode()
14841 return ([mn], [mn])
14843 def Exec(self, feedback_fn):
14844 """Remove the node group.
14848 self.cfg.RemoveNodeGroup(self.group_uuid)
14849 except errors.ConfigurationError:
14850 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14851 (self.op.group_name, self.group_uuid))
14853 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14856 class LUGroupRename(LogicalUnit):
14857 HPATH = "group-rename"
14858 HTYPE = constants.HTYPE_GROUP
14861 def ExpandNames(self):
14862 # This raises errors.OpPrereqError on its own:
14863 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14865 self.needed_locks = {
14866 locking.LEVEL_NODEGROUP: [self.group_uuid],
14869 def CheckPrereq(self):
14870 """Check prerequisites.
14872 Ensures requested new name is not yet used.
14876 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14877 except errors.OpPrereqError:
14880 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14881 " node group (UUID: %s)" %
14882 (self.op.new_name, new_name_uuid),
14883 errors.ECODE_EXISTS)
14885 def BuildHooksEnv(self):
14886 """Build hooks env.
14890 "OLD_NAME": self.op.group_name,
14891 "NEW_NAME": self.op.new_name,
14894 def BuildHooksNodes(self):
14895 """Build hooks nodes.
14898 mn = self.cfg.GetMasterNode()
14900 all_nodes = self.cfg.GetAllNodesInfo()
14901 all_nodes.pop(mn, None)
14904 run_nodes.extend(node.name for node in all_nodes.values()
14905 if node.group == self.group_uuid)
14907 return (run_nodes, run_nodes)
14909 def Exec(self, feedback_fn):
14910 """Rename the node group.
14913 group = self.cfg.GetNodeGroup(self.group_uuid)
14916 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14917 (self.op.group_name, self.group_uuid))
14919 group.name = self.op.new_name
14920 self.cfg.Update(group, feedback_fn)
14922 return self.op.new_name
14925 class LUGroupEvacuate(LogicalUnit):
14926 HPATH = "group-evacuate"
14927 HTYPE = constants.HTYPE_GROUP
14930 def ExpandNames(self):
14931 # This raises errors.OpPrereqError on its own:
14932 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14934 if self.op.target_groups:
14935 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14936 self.op.target_groups)
14938 self.req_target_uuids = []
14940 if self.group_uuid in self.req_target_uuids:
14941 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14942 " as a target group (targets are %s)" %
14944 utils.CommaJoin(self.req_target_uuids)),
14945 errors.ECODE_INVAL)
14947 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14949 self.share_locks = _ShareAll()
14950 self.needed_locks = {
14951 locking.LEVEL_INSTANCE: [],
14952 locking.LEVEL_NODEGROUP: [],
14953 locking.LEVEL_NODE: [],
14956 def DeclareLocks(self, level):
14957 if level == locking.LEVEL_INSTANCE:
14958 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14960 # Lock instances optimistically, needs verification once node and group
14961 # locks have been acquired
14962 self.needed_locks[locking.LEVEL_INSTANCE] = \
14963 self.cfg.GetNodeGroupInstances(self.group_uuid)
14965 elif level == locking.LEVEL_NODEGROUP:
14966 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14968 if self.req_target_uuids:
14969 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14971 # Lock all groups used by instances optimistically; this requires going
14972 # via the node before it's locked, requiring verification later on
14973 lock_groups.update(group_uuid
14974 for instance_name in
14975 self.owned_locks(locking.LEVEL_INSTANCE)
14977 self.cfg.GetInstanceNodeGroups(instance_name))
14979 # No target groups, need to lock all of them
14980 lock_groups = locking.ALL_SET
14982 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14984 elif level == locking.LEVEL_NODE:
14985 # This will only lock the nodes in the group to be evacuated which
14986 # contain actual instances
14987 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14988 self._LockInstancesNodes()
14990 # Lock all nodes in group to be evacuated and target groups
14991 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14992 assert self.group_uuid in owned_groups
14993 member_nodes = [node_name
14994 for group in owned_groups
14995 for node_name in self.cfg.GetNodeGroup(group).members]
14996 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14998 def CheckPrereq(self):
14999 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15000 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15001 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15003 assert owned_groups.issuperset(self.req_target_uuids)
15004 assert self.group_uuid in owned_groups
15006 # Check if locked instances are still correct
15007 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15009 # Get instance information
15010 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15012 # Check if node groups for locked instances are still correct
15013 _CheckInstancesNodeGroups(self.cfg, self.instances,
15014 owned_groups, owned_nodes, self.group_uuid)
15016 if self.req_target_uuids:
15017 # User requested specific target groups
15018 self.target_uuids = self.req_target_uuids
15020 # All groups except the one to be evacuated are potential targets
15021 self.target_uuids = [group_uuid for group_uuid in owned_groups
15022 if group_uuid != self.group_uuid]
15024 if not self.target_uuids:
15025 raise errors.OpPrereqError("There are no possible target groups",
15026 errors.ECODE_INVAL)
15028 def BuildHooksEnv(self):
15029 """Build hooks env.
15033 "GROUP_NAME": self.op.group_name,
15034 "TARGET_GROUPS": " ".join(self.target_uuids),
15037 def BuildHooksNodes(self):
15038 """Build hooks nodes.
15041 mn = self.cfg.GetMasterNode()
15043 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15045 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15047 return (run_nodes, run_nodes)
15049 def Exec(self, feedback_fn):
15050 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15052 assert self.group_uuid not in self.target_uuids
15054 req = iallocator.IAReqGroupChange(instances=instances,
15055 target_groups=self.target_uuids)
15056 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15058 ial.Run(self.op.iallocator)
15060 if not ial.success:
15061 raise errors.OpPrereqError("Can't compute group evacuation using"
15062 " iallocator '%s': %s" %
15063 (self.op.iallocator, ial.info),
15064 errors.ECODE_NORES)
15066 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15068 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15069 len(jobs), self.op.group_name)
15071 return ResultWithJobs(jobs)
15074 class TagsLU(NoHooksLU): # pylint: disable=W0223
15075 """Generic tags LU.
15077 This is an abstract class which is the parent of all the other tags LUs.
15080 def ExpandNames(self):
15081 self.group_uuid = None
15082 self.needed_locks = {}
15084 if self.op.kind == constants.TAG_NODE:
15085 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15086 lock_level = locking.LEVEL_NODE
15087 lock_name = self.op.name
15088 elif self.op.kind == constants.TAG_INSTANCE:
15089 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15090 lock_level = locking.LEVEL_INSTANCE
15091 lock_name = self.op.name
15092 elif self.op.kind == constants.TAG_NODEGROUP:
15093 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15094 lock_level = locking.LEVEL_NODEGROUP
15095 lock_name = self.group_uuid
15096 elif self.op.kind == constants.TAG_NETWORK:
15097 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15098 lock_level = locking.LEVEL_NETWORK
15099 lock_name = self.network_uuid
15104 if lock_level and getattr(self.op, "use_locking", True):
15105 self.needed_locks[lock_level] = lock_name
15107 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15108 # not possible to acquire the BGL based on opcode parameters)
15110 def CheckPrereq(self):
15111 """Check prerequisites.
15114 if self.op.kind == constants.TAG_CLUSTER:
15115 self.target = self.cfg.GetClusterInfo()
15116 elif self.op.kind == constants.TAG_NODE:
15117 self.target = self.cfg.GetNodeInfo(self.op.name)
15118 elif self.op.kind == constants.TAG_INSTANCE:
15119 self.target = self.cfg.GetInstanceInfo(self.op.name)
15120 elif self.op.kind == constants.TAG_NODEGROUP:
15121 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15122 elif self.op.kind == constants.TAG_NETWORK:
15123 self.target = self.cfg.GetNetwork(self.network_uuid)
15125 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15126 str(self.op.kind), errors.ECODE_INVAL)
15129 class LUTagsGet(TagsLU):
15130 """Returns the tags of a given object.
15135 def ExpandNames(self):
15136 TagsLU.ExpandNames(self)
15138 # Share locks as this is only a read operation
15139 self.share_locks = _ShareAll()
15141 def Exec(self, feedback_fn):
15142 """Returns the tag list.
15145 return list(self.target.GetTags())
15148 class LUTagsSearch(NoHooksLU):
15149 """Searches the tags for a given pattern.
15154 def ExpandNames(self):
15155 self.needed_locks = {}
15157 def CheckPrereq(self):
15158 """Check prerequisites.
15160 This checks the pattern passed for validity by compiling it.
15164 self.re = re.compile(self.op.pattern)
15165 except re.error, err:
15166 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15167 (self.op.pattern, err), errors.ECODE_INVAL)
15169 def Exec(self, feedback_fn):
15170 """Returns the tag list.
15174 tgts = [("/cluster", cfg.GetClusterInfo())]
15175 ilist = cfg.GetAllInstancesInfo().values()
15176 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15177 nlist = cfg.GetAllNodesInfo().values()
15178 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15179 tgts.extend(("/nodegroup/%s" % n.name, n)
15180 for n in cfg.GetAllNodeGroupsInfo().values())
15182 for path, target in tgts:
15183 for tag in target.GetTags():
15184 if self.re.search(tag):
15185 results.append((path, tag))
15189 class LUTagsSet(TagsLU):
15190 """Sets a tag on a given object.
15195 def CheckPrereq(self):
15196 """Check prerequisites.
15198 This checks the type and length of the tag name and value.
15201 TagsLU.CheckPrereq(self)
15202 for tag in self.op.tags:
15203 objects.TaggableObject.ValidateTag(tag)
15205 def Exec(self, feedback_fn):
15210 for tag in self.op.tags:
15211 self.target.AddTag(tag)
15212 except errors.TagError, err:
15213 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15214 self.cfg.Update(self.target, feedback_fn)
15217 class LUTagsDel(TagsLU):
15218 """Delete a list of tags from a given object.
15223 def CheckPrereq(self):
15224 """Check prerequisites.
15226 This checks that we have the given tag.
15229 TagsLU.CheckPrereq(self)
15230 for tag in self.op.tags:
15231 objects.TaggableObject.ValidateTag(tag)
15232 del_tags = frozenset(self.op.tags)
15233 cur_tags = self.target.GetTags()
15235 diff_tags = del_tags - cur_tags
15237 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15238 raise errors.OpPrereqError("Tag(s) %s not found" %
15239 (utils.CommaJoin(diff_names), ),
15240 errors.ECODE_NOENT)
15242 def Exec(self, feedback_fn):
15243 """Remove the tag from the object.
15246 for tag in self.op.tags:
15247 self.target.RemoveTag(tag)
15248 self.cfg.Update(self.target, feedback_fn)
15251 class LUTestDelay(NoHooksLU):
15252 """Sleep for a specified amount of time.
15254 This LU sleeps on the master and/or nodes for a specified amount of
15260 def ExpandNames(self):
15261 """Expand names and set required locks.
15263 This expands the node list, if any.
15266 self.needed_locks = {}
15267 if self.op.on_nodes:
15268 # _GetWantedNodes can be used here, but is not always appropriate to use
15269 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15270 # more information.
15271 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15272 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15274 def _TestDelay(self):
15275 """Do the actual sleep.
15278 if self.op.on_master:
15279 if not utils.TestDelay(self.op.duration):
15280 raise errors.OpExecError("Error during master delay test")
15281 if self.op.on_nodes:
15282 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15283 for node, node_result in result.items():
15284 node_result.Raise("Failure during rpc call to node %s" % node)
15286 def Exec(self, feedback_fn):
15287 """Execute the test delay opcode, with the wanted repetitions.
15290 if self.op.repeat == 0:
15293 top_value = self.op.repeat - 1
15294 for i in range(self.op.repeat):
15295 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15299 class LURestrictedCommand(NoHooksLU):
15300 """Logical unit for executing restricted commands.
15305 def ExpandNames(self):
15307 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15309 self.needed_locks = {
15310 locking.LEVEL_NODE: self.op.nodes,
15312 self.share_locks = {
15313 locking.LEVEL_NODE: not self.op.use_locking,
15316 def CheckPrereq(self):
15317 """Check prerequisites.
15321 def Exec(self, feedback_fn):
15322 """Execute restricted command and return output.
15325 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15327 # Check if correct locks are held
15328 assert set(self.op.nodes).issubset(owned_nodes)
15330 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15334 for node_name in self.op.nodes:
15335 nres = rpcres[node_name]
15337 msg = ("Command '%s' on node '%s' failed: %s" %
15338 (self.op.command, node_name, nres.fail_msg))
15339 result.append((False, msg))
15341 result.append((True, nres.payload))
15346 class LUTestJqueue(NoHooksLU):
15347 """Utility LU to test some aspects of the job queue.
15352 # Must be lower than default timeout for WaitForJobChange to see whether it
15353 # notices changed jobs
15354 _CLIENT_CONNECT_TIMEOUT = 20.0
15355 _CLIENT_CONFIRM_TIMEOUT = 60.0
15358 def _NotifyUsingSocket(cls, cb, errcls):
15359 """Opens a Unix socket and waits for another program to connect.
15362 @param cb: Callback to send socket name to client
15363 @type errcls: class
15364 @param errcls: Exception class to use for errors
15367 # Using a temporary directory as there's no easy way to create temporary
15368 # sockets without writing a custom loop around tempfile.mktemp and
15370 tmpdir = tempfile.mkdtemp()
15372 tmpsock = utils.PathJoin(tmpdir, "sock")
15374 logging.debug("Creating temporary socket at %s", tmpsock)
15375 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15380 # Send details to client
15383 # Wait for client to connect before continuing
15384 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15386 (conn, _) = sock.accept()
15387 except socket.error, err:
15388 raise errcls("Client didn't connect in time (%s)" % err)
15392 # Remove as soon as client is connected
15393 shutil.rmtree(tmpdir)
15395 # Wait for client to close
15398 # pylint: disable=E1101
15399 # Instance of '_socketobject' has no ... member
15400 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15402 except socket.error, err:
15403 raise errcls("Client failed to confirm notification (%s)" % err)
15407 def _SendNotification(self, test, arg, sockname):
15408 """Sends a notification to the client.
15411 @param test: Test name
15412 @param arg: Test argument (depends on test)
15413 @type sockname: string
15414 @param sockname: Socket path
15417 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15419 def _Notify(self, prereq, test, arg):
15420 """Notifies the client of a test.
15423 @param prereq: Whether this is a prereq-phase test
15425 @param test: Test name
15426 @param arg: Test argument (depends on test)
15430 errcls = errors.OpPrereqError
15432 errcls = errors.OpExecError
15434 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15438 def CheckArguments(self):
15439 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15440 self.expandnames_calls = 0
15442 def ExpandNames(self):
15443 checkargs_calls = getattr(self, "checkargs_calls", 0)
15444 if checkargs_calls < 1:
15445 raise errors.ProgrammerError("CheckArguments was not called")
15447 self.expandnames_calls += 1
15449 if self.op.notify_waitlock:
15450 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15452 self.LogInfo("Expanding names")
15454 # Get lock on master node (just to get a lock, not for a particular reason)
15455 self.needed_locks = {
15456 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15459 def Exec(self, feedback_fn):
15460 if self.expandnames_calls < 1:
15461 raise errors.ProgrammerError("ExpandNames was not called")
15463 if self.op.notify_exec:
15464 self._Notify(False, constants.JQT_EXEC, None)
15466 self.LogInfo("Executing")
15468 if self.op.log_messages:
15469 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15470 for idx, msg in enumerate(self.op.log_messages):
15471 self.LogInfo("Sending log message %s", idx + 1)
15472 feedback_fn(constants.JQT_MSGPREFIX + msg)
15473 # Report how many test messages have been sent
15474 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15477 raise errors.OpExecError("Opcode failure was requested")
15482 class LUTestAllocator(NoHooksLU):
15483 """Run allocator tests.
15485 This LU runs the allocator tests
15488 def CheckPrereq(self):
15489 """Check prerequisites.
15491 This checks the opcode parameters depending on the director and mode test.
15494 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15495 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15496 for attr in ["memory", "disks", "disk_template",
15497 "os", "tags", "nics", "vcpus"]:
15498 if not hasattr(self.op, attr):
15499 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15500 attr, errors.ECODE_INVAL)
15501 iname = self.cfg.ExpandInstanceName(self.op.name)
15502 if iname is not None:
15503 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15504 iname, errors.ECODE_EXISTS)
15505 if not isinstance(self.op.nics, list):
15506 raise errors.OpPrereqError("Invalid parameter 'nics'",
15507 errors.ECODE_INVAL)
15508 if not isinstance(self.op.disks, list):
15509 raise errors.OpPrereqError("Invalid parameter 'disks'",
15510 errors.ECODE_INVAL)
15511 for row in self.op.disks:
15512 if (not isinstance(row, dict) or
15513 constants.IDISK_SIZE not in row or
15514 not isinstance(row[constants.IDISK_SIZE], int) or
15515 constants.IDISK_MODE not in row or
15516 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15517 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15518 " parameter", errors.ECODE_INVAL)
15519 if self.op.hypervisor is None:
15520 self.op.hypervisor = self.cfg.GetHypervisorType()
15521 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15522 fname = _ExpandInstanceName(self.cfg, self.op.name)
15523 self.op.name = fname
15524 self.relocate_from = \
15525 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15526 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15527 constants.IALLOCATOR_MODE_NODE_EVAC):
15528 if not self.op.instances:
15529 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15530 self.op.instances = _GetWantedInstances(self, self.op.instances)
15532 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15533 self.op.mode, errors.ECODE_INVAL)
15535 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15536 if self.op.allocator is None:
15537 raise errors.OpPrereqError("Missing allocator name",
15538 errors.ECODE_INVAL)
15539 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15540 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15541 self.op.direction, errors.ECODE_INVAL)
15543 def Exec(self, feedback_fn):
15544 """Run the allocator test.
15547 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15548 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15549 memory=self.op.memory,
15550 disks=self.op.disks,
15551 disk_template=self.op.disk_template,
15555 vcpus=self.op.vcpus,
15556 spindle_use=self.op.spindle_use,
15557 hypervisor=self.op.hypervisor)
15558 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15559 req = iallocator.IAReqRelocate(name=self.op.name,
15560 relocate_from=list(self.relocate_from))
15561 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15562 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15563 target_groups=self.op.target_groups)
15564 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15565 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15566 evac_mode=self.op.evac_mode)
15567 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15568 disk_template = self.op.disk_template
15569 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15570 memory=self.op.memory,
15571 disks=self.op.disks,
15572 disk_template=disk_template,
15576 vcpus=self.op.vcpus,
15577 spindle_use=self.op.spindle_use,
15578 hypervisor=self.op.hypervisor)
15579 for idx in range(self.op.count)]
15580 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15582 raise errors.ProgrammerError("Uncatched mode %s in"
15583 " LUTestAllocator.Exec", self.op.mode)
15585 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15586 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15587 result = ial.in_text
15589 ial.Run(self.op.allocator, validate=False)
15590 result = ial.out_text
15595 class LUNetworkAdd(LogicalUnit):
15596 """Logical unit for creating networks.
15599 HPATH = "network-add"
15600 HTYPE = constants.HTYPE_NETWORK
15603 def BuildHooksNodes(self):
15604 """Build hooks nodes.
15607 mn = self.cfg.GetMasterNode()
15608 return ([mn], [mn])
15610 def ExpandNames(self):
15611 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15612 self.needed_locks = {}
15613 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15615 def CheckPrereq(self):
15616 """Check prerequisites.
15618 This checks that the given group name is not an existing node group
15622 if self.op.network is None:
15623 raise errors.OpPrereqError("Network must be given",
15624 errors.ECODE_INVAL)
15626 uuid = self.cfg.LookupNetwork(self.op.network_name)
15629 raise errors.OpPrereqError("Network '%s' already defined" %
15630 self.op.network, errors.ECODE_EXISTS)
15632 if self.op.mac_prefix:
15633 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15635 # Check tag validity
15636 for tag in self.op.tags:
15637 objects.TaggableObject.ValidateTag(tag)
15639 def BuildHooksEnv(self):
15640 """Build hooks env.
15644 "name": self.op.network_name,
15645 "subnet": self.op.network,
15646 "gateway": self.op.gateway,
15647 "network6": self.op.network6,
15648 "gateway6": self.op.gateway6,
15649 "mac_prefix": self.op.mac_prefix,
15650 "network_type": self.op.network_type,
15651 "tags": self.op.tags,
15653 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15655 def Exec(self, feedback_fn):
15656 """Add the ip pool to the cluster.
15659 nobj = objects.Network(name=self.op.network_name,
15660 network=self.op.network,
15661 gateway=self.op.gateway,
15662 network6=self.op.network6,
15663 gateway6=self.op.gateway6,
15664 mac_prefix=self.op.mac_prefix,
15665 network_type=self.op.network_type,
15666 uuid=self.network_uuid,
15668 # Initialize the associated address pool
15670 pool = network.AddressPool.InitializeNetwork(nobj)
15671 except errors.AddressPoolError, e:
15672 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15674 # Check if we need to reserve the nodes and the cluster master IP
15675 # These may not be allocated to any instances in routed mode, as
15676 # they wouldn't function anyway.
15677 for node in self.cfg.GetAllNodesInfo().values():
15678 for ip in [node.primary_ip, node.secondary_ip]:
15681 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15683 except errors.AddressPoolError:
15686 master_ip = self.cfg.GetClusterInfo().master_ip
15688 pool.Reserve(master_ip)
15689 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15690 except errors.AddressPoolError:
15693 if self.op.add_reserved_ips:
15694 for ip in self.op.add_reserved_ips:
15696 pool.Reserve(ip, external=True)
15697 except errors.AddressPoolError, e:
15698 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15701 for tag in self.op.tags:
15704 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15705 del self.remove_locks[locking.LEVEL_NETWORK]
15708 class LUNetworkRemove(LogicalUnit):
15709 HPATH = "network-remove"
15710 HTYPE = constants.HTYPE_NETWORK
15713 def ExpandNames(self):
15714 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15716 if not self.network_uuid:
15717 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15718 errors.ECODE_INVAL)
15719 self.needed_locks = {
15720 locking.LEVEL_NETWORK: [self.network_uuid],
15723 def CheckPrereq(self):
15724 """Check prerequisites.
15726 This checks that the given network name exists as a network, that is
15727 empty (i.e., contains no nodes), and that is not the last group of the
15732 # Verify that the network is not conncted.
15733 node_groups = [group.name
15734 for group in self.cfg.GetAllNodeGroupsInfo().values()
15735 for net in group.networks.keys()
15736 if net == self.network_uuid]
15739 self.LogWarning("Nework '%s' is connected to the following"
15740 " node groups: %s" % (self.op.network_name,
15741 utils.CommaJoin(utils.NiceSort(node_groups))))
15742 raise errors.OpPrereqError("Network still connected",
15743 errors.ECODE_STATE)
15745 def BuildHooksEnv(self):
15746 """Build hooks env.
15750 "NETWORK_NAME": self.op.network_name,
15753 def BuildHooksNodes(self):
15754 """Build hooks nodes.
15757 mn = self.cfg.GetMasterNode()
15758 return ([mn], [mn])
15760 def Exec(self, feedback_fn):
15761 """Remove the network.
15765 self.cfg.RemoveNetwork(self.network_uuid)
15766 except errors.ConfigurationError:
15767 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15768 (self.op.network_name, self.network_uuid))
15771 class LUNetworkSetParams(LogicalUnit):
15772 """Modifies the parameters of a network.
15775 HPATH = "network-modify"
15776 HTYPE = constants.HTYPE_NETWORK
15779 def CheckArguments(self):
15780 if (self.op.gateway and
15781 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15782 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15783 " at once", errors.ECODE_INVAL)
15785 def ExpandNames(self):
15786 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15787 self.network = self.cfg.GetNetwork(self.network_uuid)
15788 if self.network is None:
15789 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15790 (self.op.network_name, self.network_uuid),
15791 errors.ECODE_INVAL)
15792 self.needed_locks = {
15793 locking.LEVEL_NETWORK: [self.network_uuid],
15796 def CheckPrereq(self):
15797 """Check prerequisites.
15800 self.gateway = self.network.gateway
15801 self.network_type = self.network.network_type
15802 self.mac_prefix = self.network.mac_prefix
15803 self.network6 = self.network.network6
15804 self.gateway6 = self.network.gateway6
15805 self.tags = self.network.tags
15807 self.pool = network.AddressPool(self.network)
15809 if self.op.gateway:
15810 if self.op.gateway == constants.VALUE_NONE:
15811 self.gateway = None
15813 self.gateway = self.op.gateway
15814 if self.pool.IsReserved(self.gateway):
15815 raise errors.OpPrereqError("%s is already reserved" %
15816 self.gateway, errors.ECODE_INVAL)
15818 if self.op.network_type:
15819 if self.op.network_type == constants.VALUE_NONE:
15820 self.network_type = None
15822 self.network_type = self.op.network_type
15824 if self.op.mac_prefix:
15825 if self.op.mac_prefix == constants.VALUE_NONE:
15826 self.mac_prefix = None
15828 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15829 self.mac_prefix = self.op.mac_prefix
15831 if self.op.gateway6:
15832 if self.op.gateway6 == constants.VALUE_NONE:
15833 self.gateway6 = None
15835 self.gateway6 = self.op.gateway6
15837 if self.op.network6:
15838 if self.op.network6 == constants.VALUE_NONE:
15839 self.network6 = None
15841 self.network6 = self.op.network6
15843 def BuildHooksEnv(self):
15844 """Build hooks env.
15848 "name": self.op.network_name,
15849 "subnet": self.network.network,
15850 "gateway": self.gateway,
15851 "network6": self.network6,
15852 "gateway6": self.gateway6,
15853 "mac_prefix": self.mac_prefix,
15854 "network_type": self.network_type,
15857 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15859 def BuildHooksNodes(self):
15860 """Build hooks nodes.
15863 mn = self.cfg.GetMasterNode()
15864 return ([mn], [mn])
15866 def Exec(self, feedback_fn):
15867 """Modifies the network.
15870 #TODO: reserve/release via temporary reservation manager
15871 # extend cfg.ReserveIp/ReleaseIp with the external flag
15872 if self.op.gateway:
15873 if self.gateway == self.network.gateway:
15874 self.LogWarning("Gateway is already %s", self.gateway)
15877 self.pool.Reserve(self.gateway, external=True)
15878 if self.network.gateway:
15879 self.pool.Release(self.network.gateway, external=True)
15880 self.network.gateway = self.gateway
15882 if self.op.add_reserved_ips:
15883 for ip in self.op.add_reserved_ips:
15885 if self.pool.IsReserved(ip):
15886 self.LogWarning("IP address %s is already reserved", ip)
15888 self.pool.Reserve(ip, external=True)
15889 except errors.AddressPoolError, err:
15890 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
15892 if self.op.remove_reserved_ips:
15893 for ip in self.op.remove_reserved_ips:
15894 if ip == self.network.gateway:
15895 self.LogWarning("Cannot unreserve Gateway's IP")
15898 if not self.pool.IsReserved(ip):
15899 self.LogWarning("IP address %s is already unreserved", ip)
15901 self.pool.Release(ip, external=True)
15902 except errors.AddressPoolError, err:
15903 self.LogWarning("Cannot release IP address %s: %s", ip, err)
15905 if self.op.mac_prefix:
15906 self.network.mac_prefix = self.mac_prefix
15908 if self.op.network6:
15909 self.network.network6 = self.network6
15911 if self.op.gateway6:
15912 self.network.gateway6 = self.gateway6
15914 if self.op.network_type:
15915 self.network.network_type = self.network_type
15917 self.pool.Validate()
15919 self.cfg.Update(self.network, feedback_fn)
15922 class _NetworkQuery(_QueryBase):
15923 FIELDS = query.NETWORK_FIELDS
15925 def ExpandNames(self, lu):
15926 lu.needed_locks = {}
15928 self._all_networks = lu.cfg.GetAllNetworksInfo()
15929 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15932 self.wanted = [name_to_uuid[name]
15933 for name in utils.NiceSort(name_to_uuid.keys())]
15935 # Accept names to be either names or UUIDs.
15938 all_uuid = frozenset(self._all_networks.keys())
15940 for name in self.names:
15941 if name in all_uuid:
15942 self.wanted.append(name)
15943 elif name in name_to_uuid:
15944 self.wanted.append(name_to_uuid[name])
15946 missing.append(name)
15949 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15950 errors.ECODE_NOENT)
15952 def DeclareLocks(self, lu, level):
15955 def _GetQueryData(self, lu):
15956 """Computes the list of networks and their attributes.
15959 do_instances = query.NETQ_INST in self.requested_data
15960 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15961 do_stats = query.NETQ_STATS in self.requested_data
15963 network_to_groups = None
15964 network_to_instances = None
15967 # For NETQ_GROUP, we need to map network->[groups]
15969 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15970 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15973 all_instances = lu.cfg.GetAllInstancesInfo()
15974 all_nodes = lu.cfg.GetAllNodesInfo()
15975 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15977 for group in all_groups.values():
15979 group_nodes = [node.name for node in all_nodes.values() if
15980 node.group == group.uuid]
15981 group_instances = [instance for instance in all_instances.values()
15982 if instance.primary_node in group_nodes]
15984 for net_uuid in group.networks.keys():
15985 if net_uuid in network_to_groups:
15986 netparams = group.networks[net_uuid]
15987 mode = netparams[constants.NIC_MODE]
15988 link = netparams[constants.NIC_LINK]
15989 info = group.name + "(" + mode + ", " + link + ")"
15990 network_to_groups[net_uuid].append(info)
15993 for instance in group_instances:
15994 for nic in instance.nics:
15995 if nic.network == self._all_networks[net_uuid].name:
15996 network_to_instances[net_uuid].append(instance.name)
16001 for uuid, net in self._all_networks.items():
16002 if uuid in self.wanted:
16003 pool = network.AddressPool(net)
16005 "free_count": pool.GetFreeCount(),
16006 "reserved_count": pool.GetReservedCount(),
16007 "map": pool.GetMap(),
16008 "external_reservations":
16009 utils.CommaJoin(pool.GetExternalReservations()),
16012 return query.NetworkQueryData([self._all_networks[uuid]
16013 for uuid in self.wanted],
16015 network_to_instances,
16019 class LUNetworkQuery(NoHooksLU):
16020 """Logical unit for querying networks.
16025 def CheckArguments(self):
16026 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16027 self.op.output_fields, False)
16029 def ExpandNames(self):
16030 self.nq.ExpandNames(self)
16032 def Exec(self, feedback_fn):
16033 return self.nq.OldStyleQuery(self)
16036 class LUNetworkConnect(LogicalUnit):
16037 """Connect a network to a nodegroup
16040 HPATH = "network-connect"
16041 HTYPE = constants.HTYPE_NETWORK
16044 def ExpandNames(self):
16045 self.network_name = self.op.network_name
16046 self.group_name = self.op.group_name
16047 self.network_mode = self.op.network_mode
16048 self.network_link = self.op.network_link
16050 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16051 self.network = self.cfg.GetNetwork(self.network_uuid)
16052 if self.network is None:
16053 raise errors.OpPrereqError("Network %s does not exist" %
16054 self.network_name, errors.ECODE_INVAL)
16056 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16057 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16058 if self.group is None:
16059 raise errors.OpPrereqError("Group %s does not exist" %
16060 self.group_name, errors.ECODE_INVAL)
16062 self.needed_locks = {
16063 locking.LEVEL_INSTANCE: [],
16064 locking.LEVEL_NODEGROUP: [self.group_uuid],
16066 self.share_locks[locking.LEVEL_INSTANCE] = 1
16068 def DeclareLocks(self, level):
16069 if level == locking.LEVEL_INSTANCE:
16070 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16072 # Lock instances optimistically, needs verification once group lock has
16074 self.needed_locks[locking.LEVEL_INSTANCE] = \
16075 self.cfg.GetNodeGroupInstances(self.group_uuid)
16077 def BuildHooksEnv(self):
16079 "GROUP_NAME": self.group_name,
16080 "GROUP_NETWORK_MODE": self.network_mode,
16081 "GROUP_NETWORK_LINK": self.network_link,
16083 ret.update(_BuildNetworkHookEnvByObject(self.network))
16086 def BuildHooksNodes(self):
16087 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16088 return (nodes, nodes)
16090 def CheckPrereq(self):
16091 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16095 constants.NIC_MODE: self.network_mode,
16096 constants.NIC_LINK: self.network_link,
16098 objects.NIC.CheckParameterSyntax(self.netparams)
16100 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16101 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16102 self.connected = False
16103 if self.network_uuid in self.group.networks:
16104 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16105 (self.network_name, self.group.name))
16106 self.connected = True
16109 pool = network.AddressPool(self.network)
16110 if self.op.conflicts_check:
16111 groupinstances = []
16112 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16113 groupinstances.append(self.cfg.GetInstanceInfo(n))
16114 instances = [(instance.name, idx, nic.ip)
16115 for instance in groupinstances
16116 for idx, nic in enumerate(instance.nics)
16117 if (not nic.network and pool.Contains(nic.ip))]
16119 self.LogWarning("Following occurences use IPs from network %s"
16120 " that is about to connect to nodegroup %s: %s" %
16121 (self.network_name, self.group.name,
16123 raise errors.OpPrereqError("Conflicting IPs found."
16124 " Please remove/modify"
16125 " corresponding NICs",
16126 errors.ECODE_INVAL)
16128 def Exec(self, feedback_fn):
16132 self.group.networks[self.network_uuid] = self.netparams
16133 self.cfg.Update(self.group, feedback_fn)
16136 class LUNetworkDisconnect(LogicalUnit):
16137 """Disconnect a network to a nodegroup
16140 HPATH = "network-disconnect"
16141 HTYPE = constants.HTYPE_NETWORK
16144 def ExpandNames(self):
16145 self.network_name = self.op.network_name
16146 self.group_name = self.op.group_name
16148 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16149 self.network = self.cfg.GetNetwork(self.network_uuid)
16150 if self.network is None:
16151 raise errors.OpPrereqError("Network %s does not exist" %
16152 self.network_name, errors.ECODE_INVAL)
16154 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16155 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16156 if self.group is None:
16157 raise errors.OpPrereqError("Group %s does not exist" %
16158 self.group_name, errors.ECODE_INVAL)
16160 self.needed_locks = {
16161 locking.LEVEL_INSTANCE: [],
16162 locking.LEVEL_NODEGROUP: [self.group_uuid],
16164 self.share_locks[locking.LEVEL_INSTANCE] = 1
16166 def DeclareLocks(self, level):
16167 if level == locking.LEVEL_INSTANCE:
16168 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16170 # Lock instances optimistically, needs verification once group lock has
16172 self.needed_locks[locking.LEVEL_INSTANCE] = \
16173 self.cfg.GetNodeGroupInstances(self.group_uuid)
16175 def BuildHooksEnv(self):
16177 "GROUP_NAME": self.group_name,
16179 ret.update(_BuildNetworkHookEnvByObject(self.network))
16182 def BuildHooksNodes(self):
16183 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16184 return (nodes, nodes)
16186 def CheckPrereq(self):
16187 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16190 self.connected = True
16191 if self.network_uuid not in self.group.networks:
16192 self.LogWarning("Network '%s' is not mapped to group '%s'",
16193 self.network_name, self.group.name)
16194 self.connected = False
16197 if self.op.conflicts_check:
16198 groupinstances = []
16199 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16200 groupinstances.append(self.cfg.GetInstanceInfo(n))
16201 instances = [(instance.name, idx, nic.ip)
16202 for instance in groupinstances
16203 for idx, nic in enumerate(instance.nics)
16204 if nic.network == self.network_name]
16206 self.LogWarning("Following occurences use IPs from network %s"
16207 " that is about to disconnected from the nodegroup"
16209 (self.network_name, self.group.name,
16211 raise errors.OpPrereqError("Conflicting IPs."
16212 " Please remove/modify"
16213 " corresponding NICS",
16214 errors.ECODE_INVAL)
16216 def Exec(self, feedback_fn):
16217 if not self.connected:
16220 del self.group.networks[self.network_uuid]
16221 self.cfg.Update(self.group, feedback_fn)
16224 #: Query type implementations
16226 constants.QR_CLUSTER: _ClusterQuery,
16227 constants.QR_INSTANCE: _InstanceQuery,
16228 constants.QR_NODE: _NodeQuery,
16229 constants.QR_GROUP: _GroupQuery,
16230 constants.QR_NETWORK: _NetworkQuery,
16231 constants.QR_OS: _OsQuery,
16232 constants.QR_EXPORT: _ExportQuery,
16235 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16238 def _GetQueryImplementation(name):
16239 """Returns the implemtnation for a query type.
16241 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16245 return _QUERY_IMPL[name]
16247 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16248 errors.ECODE_INVAL)
16251 def _CheckForConflictingIp(lu, ip, node):
16252 """In case of conflicting ip raise error.
16255 @param ip: ip address
16257 @param node: node name
16260 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16261 if conf_net is not None:
16262 raise errors.OpPrereqError("Conflicting IP found:"
16263 " %s <> %s." % (ip, conf_net),
16264 errors.ECODE_INVAL)
16266 return (None, None)