4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _AnnotateDiskParams(instance, devs, cfg):
589 """Little helper wrapper to the rpc annotation method.
591 @param instance: The instance object
592 @type devs: List of L{objects.Disk}
593 @param devs: The root devices (not any of its children!)
594 @param cfg: The config object
595 @returns The annotated disk copies
596 @see L{rpc.AnnotateDiskParams}
599 return rpc.AnnotateDiskParams(instance.disk_template, devs,
600 cfg.GetInstanceDiskParams(instance))
603 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
605 """Checks if node groups for locked instances are still correct.
607 @type cfg: L{config.ConfigWriter}
608 @param cfg: Cluster configuration
609 @type instances: dict; string as key, L{objects.Instance} as value
610 @param instances: Dictionary, instance name as key, instance object as value
611 @type owned_groups: iterable of string
612 @param owned_groups: List of owned groups
613 @type owned_nodes: iterable of string
614 @param owned_nodes: List of owned nodes
615 @type cur_group_uuid: string or None
616 @param cur_group_uuid: Optional group UUID to check against instance's groups
619 for (name, inst) in instances.items():
620 assert owned_nodes.issuperset(inst.all_nodes), \
621 "Instance %s's nodes changed while we kept the lock" % name
623 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
625 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
626 "Instance %s has no node in group %s" % (name, cur_group_uuid)
629 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
631 """Checks if the owned node groups are still correct for an instance.
633 @type cfg: L{config.ConfigWriter}
634 @param cfg: The cluster configuration
635 @type instance_name: string
636 @param instance_name: Instance name
637 @type owned_groups: set or frozenset
638 @param owned_groups: List of currently owned node groups
639 @type primary_only: boolean
640 @param primary_only: Whether to check node groups for only the primary node
643 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
645 if not owned_groups.issuperset(inst_groups):
646 raise errors.OpPrereqError("Instance %s's node groups changed since"
647 " locks were acquired, current groups are"
648 " are '%s', owning groups '%s'; retry the"
651 utils.CommaJoin(inst_groups),
652 utils.CommaJoin(owned_groups)),
658 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
659 """Checks if the instances in a node group are still correct.
661 @type cfg: L{config.ConfigWriter}
662 @param cfg: The cluster configuration
663 @type group_uuid: string
664 @param group_uuid: Node group UUID
665 @type owned_instances: set or frozenset
666 @param owned_instances: List of currently owned instances
669 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
670 if owned_instances != wanted_instances:
671 raise errors.OpPrereqError("Instances in node group '%s' changed since"
672 " locks were acquired, wanted '%s', have '%s';"
673 " retry the operation" %
675 utils.CommaJoin(wanted_instances),
676 utils.CommaJoin(owned_instances)),
679 return wanted_instances
682 def _SupportsOob(cfg, node):
683 """Tells if node supports OOB.
685 @type cfg: L{config.ConfigWriter}
686 @param cfg: The cluster configuration
687 @type node: L{objects.Node}
688 @param node: The node
689 @return: The OOB script if supported or an empty string otherwise
692 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
695 def _CopyLockList(names):
696 """Makes a copy of a list of lock names.
698 Handles L{locking.ALL_SET} correctly.
701 if names == locking.ALL_SET:
702 return locking.ALL_SET
707 def _GetWantedNodes(lu, nodes):
708 """Returns list of checked and expanded node names.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
713 @param nodes: list of node names or None for all nodes
715 @return: the list of nodes, sorted
716 @raise errors.ProgrammerError: if the nodes parameter is wrong type
720 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
722 return utils.NiceSort(lu.cfg.GetNodeList())
725 def _GetWantedInstances(lu, instances):
726 """Returns list of checked and expanded instance names.
728 @type lu: L{LogicalUnit}
729 @param lu: the logical unit on whose behalf we execute
730 @type instances: list
731 @param instances: list of instance names or None for all instances
733 @return: the list of instances, sorted
734 @raise errors.OpPrereqError: if the instances parameter is wrong type
735 @raise errors.OpPrereqError: if any of the passed instances is not found
739 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
741 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
745 def _GetUpdatedParams(old_params, update_dict,
746 use_default=True, use_none=False):
747 """Return the new version of a parameter dictionary.
749 @type old_params: dict
750 @param old_params: old parameters
751 @type update_dict: dict
752 @param update_dict: dict containing new parameter values, or
753 constants.VALUE_DEFAULT to reset the parameter to its default
755 @param use_default: boolean
756 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
757 values as 'to be deleted' values
758 @param use_none: boolean
759 @type use_none: whether to recognise C{None} values as 'to be
762 @return: the new parameter dictionary
765 params_copy = copy.deepcopy(old_params)
766 for key, val in update_dict.iteritems():
767 if ((use_default and val == constants.VALUE_DEFAULT) or
768 (use_none and val is None)):
774 params_copy[key] = val
778 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
779 """Return the new version of a instance policy.
781 @param group_policy: whether this policy applies to a group and thus
782 we should support removal of policy entries
785 use_none = use_default = group_policy
786 ipolicy = copy.deepcopy(old_ipolicy)
787 for key, value in new_ipolicy.items():
788 if key not in constants.IPOLICY_ALL_KEYS:
789 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
791 if key in constants.IPOLICY_ISPECS:
792 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
793 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
795 use_default=use_default)
797 if (not value or value == [constants.VALUE_DEFAULT] or
798 value == constants.VALUE_DEFAULT):
802 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
803 " on the cluster'" % key,
806 if key in constants.IPOLICY_PARAMETERS:
807 # FIXME: we assume all such values are float
809 ipolicy[key] = float(value)
810 except (TypeError, ValueError), err:
811 raise errors.OpPrereqError("Invalid value for attribute"
812 " '%s': '%s', error: %s" %
813 (key, value, err), errors.ECODE_INVAL)
815 # FIXME: we assume all others are lists; this should be redone
817 ipolicy[key] = list(value)
819 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
820 except errors.ConfigurationError, err:
821 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
826 def _UpdateAndVerifySubDict(base, updates, type_check):
827 """Updates and verifies a dict with sub dicts of the same type.
829 @param base: The dict with the old data
830 @param updates: The dict with the new data
831 @param type_check: Dict suitable to ForceDictType to verify correct types
832 @returns: A new dict with updated and verified values
836 new = _GetUpdatedParams(old, value)
837 utils.ForceDictType(new, type_check)
840 ret = copy.deepcopy(base)
841 ret.update(dict((key, fn(base.get(key, {}), value))
842 for key, value in updates.items()))
846 def _MergeAndVerifyHvState(op_input, obj_input):
847 """Combines the hv state from an opcode with the one of the object
849 @param op_input: The input dict from the opcode
850 @param obj_input: The input dict from the objects
851 @return: The verified and updated dict
855 invalid_hvs = set(op_input) - constants.HYPER_TYPES
857 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
858 " %s" % utils.CommaJoin(invalid_hvs),
860 if obj_input is None:
862 type_check = constants.HVSTS_PARAMETER_TYPES
863 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
868 def _MergeAndVerifyDiskState(op_input, obj_input):
869 """Combines the disk state from an opcode with the one of the object
871 @param op_input: The input dict from the opcode
872 @param obj_input: The input dict from the objects
873 @return: The verified and updated dict
876 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
878 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
879 utils.CommaJoin(invalid_dst),
881 type_check = constants.DSS_PARAMETER_TYPES
882 if obj_input is None:
884 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
886 for key, value in op_input.items())
891 def _ReleaseLocks(lu, level, names=None, keep=None):
892 """Releases locks owned by an LU.
894 @type lu: L{LogicalUnit}
895 @param level: Lock level
896 @type names: list or None
897 @param names: Names of locks to release
898 @type keep: list or None
899 @param keep: Names of locks to retain
902 assert not (keep is not None and names is not None), \
903 "Only one of the 'names' and the 'keep' parameters can be given"
905 if names is not None:
906 should_release = names.__contains__
908 should_release = lambda name: name not in keep
910 should_release = None
912 owned = lu.owned_locks(level)
914 # Not owning any lock at this level, do nothing
921 # Determine which locks to release
923 if should_release(name):
928 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
930 # Release just some locks
931 lu.glm.release(level, names=release)
933 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
936 lu.glm.release(level)
938 assert not lu.glm.is_owned(level), "No locks should be owned"
941 def _MapInstanceDisksToNodes(instances):
942 """Creates a map from (node, volume) to instance name.
944 @type instances: list of L{objects.Instance}
945 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
948 return dict(((node, vol), inst.name)
949 for inst in instances
950 for (node, vols) in inst.MapLVsByNode().items()
954 def _RunPostHook(lu, node_name):
955 """Runs the post-hook for an opcode on a single node.
958 hm = lu.proc.BuildHooksManager(lu)
960 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 except Exception, err: # pylint: disable=W0703
962 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 if not lu.cfg.GetNodeInfo(pnode).offline:
1112 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1113 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1114 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1119 lu.LogWarning("Primary node offline, ignoring check that instance"
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124 """Computes if value is in the desired range.
1126 @param name: name of the parameter for which we perform the check
1127 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129 @param ipolicy: dictionary containing min, max and std values
1130 @param value: actual value that we want to use
1131 @return: None or element not meeting the criteria
1135 if value in [None, constants.VALUE_AUTO]:
1137 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139 if value > max_v or min_v > value:
1141 fqn = "%s/%s" % (name, qualifier)
1144 return ("%s value %s is not in range [%s, %s]" %
1145 (fqn, value, min_v, max_v))
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150 nic_count, disk_sizes, spindle_use,
1151 _compute_fn=_ComputeMinMaxSpec):
1152 """Verifies ipolicy against provided specs.
1155 @param ipolicy: The ipolicy
1157 @param mem_size: The memory size
1158 @type cpu_count: int
1159 @param cpu_count: Used cpu cores
1160 @type disk_count: int
1161 @param disk_count: Number of disks used
1162 @type nic_count: int
1163 @param nic_count: Number of nics used
1164 @type disk_sizes: list of ints
1165 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166 @type spindle_use: int
1167 @param spindle_use: The number of spindles this instance uses
1168 @param _compute_fn: The compute function (unittest only)
1169 @return: A list of violations, or an empty list of no violations are found
1172 assert disk_count == len(disk_sizes)
1175 (constants.ISPEC_MEM_SIZE, "", mem_size),
1176 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177 (constants.ISPEC_DISK_COUNT, "", disk_count),
1178 (constants.ISPEC_NIC_COUNT, "", nic_count),
1179 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181 for idx, d in enumerate(disk_sizes)]
1184 (_compute_fn(name, qualifier, ipolicy, value)
1185 for (name, qualifier, value) in test_settings))
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189 _compute_fn=_ComputeIPolicySpecViolation):
1190 """Compute if instance meets the specs of ipolicy.
1193 @param ipolicy: The ipolicy to verify against
1194 @type instance: L{objects.Instance}
1195 @param instance: The instance to verify
1196 @param _compute_fn: The function to verify ipolicy (unittest only)
1197 @see: L{_ComputeIPolicySpecViolation}
1200 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203 disk_count = len(instance.disks)
1204 disk_sizes = [disk.size for disk in instance.disks]
1205 nic_count = len(instance.nics)
1207 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208 disk_sizes, spindle_use)
1211 def _ComputeIPolicyInstanceSpecViolation(
1212 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1213 """Compute if instance specs meets the specs of ipolicy.
1216 @param ipolicy: The ipolicy to verify against
1217 @param instance_spec: dict
1218 @param instance_spec: The instance spec to verify
1219 @param _compute_fn: The function to verify ipolicy (unittest only)
1220 @see: L{_ComputeIPolicySpecViolation}
1223 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231 disk_sizes, spindle_use)
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236 _compute_fn=_ComputeIPolicyInstanceViolation):
1237 """Compute if instance meets the specs of the new target group.
1239 @param ipolicy: The ipolicy to verify
1240 @param instance: The instance object to verify
1241 @param current_group: The current group of the instance
1242 @param target_group: The new group of the instance
1243 @param _compute_fn: The function to verify ipolicy (unittest only)
1244 @see: L{_ComputeIPolicySpecViolation}
1247 if current_group == target_group:
1250 return _compute_fn(ipolicy, instance)
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254 _compute_fn=_ComputeIPolicyNodeViolation):
1255 """Checks that the target node is correct in terms of instance policy.
1257 @param ipolicy: The ipolicy to verify
1258 @param instance: The instance object to verify
1259 @param node: The new node to relocate
1260 @param ignore: Ignore violations of the ipolicy
1261 @param _compute_fn: The function to verify ipolicy (unittest only)
1262 @see: L{_ComputeIPolicySpecViolation}
1265 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1269 msg = ("Instance does not meet target node group's (%s) instance"
1270 " policy: %s") % (node.group, utils.CommaJoin(res))
1274 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278 """Computes a set of any instances that would violate the new ipolicy.
1280 @param old_ipolicy: The current (still in-place) ipolicy
1281 @param new_ipolicy: The new (to become) ipolicy
1282 @param instances: List of instances to verify
1283 @return: A list of instances which violates the new ipolicy but
1287 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288 _ComputeViolatingInstances(old_ipolicy, instances))
1291 def _ExpandItemName(fn, name, kind):
1292 """Expand an item name.
1294 @param fn: the function to use for expansion
1295 @param name: requested item name
1296 @param kind: text description ('Node' or 'Instance')
1297 @return: the resolved (full) name
1298 @raise errors.OpPrereqError: if the item is not found
1301 full_name = fn(name)
1302 if full_name is None:
1303 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1308 def _ExpandNodeName(cfg, name):
1309 """Wrapper over L{_ExpandItemName} for nodes."""
1310 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1313 def _ExpandInstanceName(cfg, name):
1314 """Wrapper over L{_ExpandItemName} for instance."""
1315 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1318 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1319 network_type, mac_prefix, tags):
1320 """Builds network related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the network
1326 @type subnet: string
1327 @param subnet: the ipv4 subnet
1328 @type gateway: string
1329 @param gateway: the ipv4 gateway
1330 @type network6: string
1331 @param network6: the ipv6 subnet
1332 @type gateway6: string
1333 @param gateway6: the ipv6 gateway
1334 @type network_type: string
1335 @param network_type: the type of the network
1336 @type mac_prefix: string
1337 @param mac_prefix: the mac_prefix
1339 @param tags: the tags of the network
1344 env["NETWORK_NAME"] = name
1346 env["NETWORK_SUBNET"] = subnet
1348 env["NETWORK_GATEWAY"] = gateway
1350 env["NETWORK_SUBNET6"] = network6
1352 env["NETWORK_GATEWAY6"] = gateway6
1354 env["NETWORK_MAC_PREFIX"] = mac_prefix
1356 env["NETWORK_TYPE"] = network_type
1358 env["NETWORK_TAGS"] = " ".join(tags)
1363 def _BuildNetworkHookEnvByObject(net):
1364 """Builds network related env varliables for hooks
1366 @type net: L{objects.Network}
1367 @param net: the network object
1372 "subnet": net.network,
1373 "gateway": net.gateway,
1374 "network6": net.network6,
1375 "gateway6": net.gateway6,
1376 "network_type": net.network_type,
1377 "mac_prefix": net.mac_prefix,
1381 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1384 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1385 minmem, maxmem, vcpus, nics, disk_template, disks,
1386 bep, hvp, hypervisor_name, tags):
1387 """Builds instance related env variables for hooks
1389 This builds the hook environment from individual variables.
1392 @param name: the name of the instance
1393 @type primary_node: string
1394 @param primary_node: the name of the instance's primary node
1395 @type secondary_nodes: list
1396 @param secondary_nodes: list of secondary nodes as strings
1397 @type os_type: string
1398 @param os_type: the name of the instance's OS
1399 @type status: string
1400 @param status: the desired status of the instance
1401 @type minmem: string
1402 @param minmem: the minimum memory size of the instance
1403 @type maxmem: string
1404 @param maxmem: the maximum memory size of the instance
1406 @param vcpus: the count of VCPUs the instance has
1408 @param nics: list of tuples (ip, mac, mode, link, network) representing
1409 the NICs the instance has
1410 @type disk_template: string
1411 @param disk_template: the disk template of the instance
1413 @param disks: the list of (size, mode) pairs
1415 @param bep: the backend parameters for the instance
1417 @param hvp: the hypervisor parameters for the instance
1418 @type hypervisor_name: string
1419 @param hypervisor_name: the hypervisor for the instance
1421 @param tags: list of instance tags as strings
1423 @return: the hook environment for this instance
1428 "INSTANCE_NAME": name,
1429 "INSTANCE_PRIMARY": primary_node,
1430 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1431 "INSTANCE_OS_TYPE": os_type,
1432 "INSTANCE_STATUS": status,
1433 "INSTANCE_MINMEM": minmem,
1434 "INSTANCE_MAXMEM": maxmem,
1435 # TODO(2.7) remove deprecated "memory" value
1436 "INSTANCE_MEMORY": maxmem,
1437 "INSTANCE_VCPUS": vcpus,
1438 "INSTANCE_DISK_TEMPLATE": disk_template,
1439 "INSTANCE_HYPERVISOR": hypervisor_name,
1442 nic_count = len(nics)
1443 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1446 env["INSTANCE_NIC%d_IP" % idx] = ip
1447 env["INSTANCE_NIC%d_MAC" % idx] = mac
1448 env["INSTANCE_NIC%d_MODE" % idx] = mode
1449 env["INSTANCE_NIC%d_LINK" % idx] = link
1451 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1453 nobj = objects.Network.FromDict(netinfo)
1455 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1457 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1459 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1461 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1463 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1464 if nobj.network_type:
1465 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1467 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1468 if mode == constants.NIC_MODE_BRIDGED:
1469 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1473 env["INSTANCE_NIC_COUNT"] = nic_count
1476 disk_count = len(disks)
1477 for idx, (size, mode) in enumerate(disks):
1478 env["INSTANCE_DISK%d_SIZE" % idx] = size
1479 env["INSTANCE_DISK%d_MODE" % idx] = mode
1483 env["INSTANCE_DISK_COUNT"] = disk_count
1488 env["INSTANCE_TAGS"] = " ".join(tags)
1490 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1491 for key, value in source.items():
1492 env["INSTANCE_%s_%s" % (kind, key)] = value
1497 def _NICToTuple(lu, nic):
1498 """Build a tupple of nic information.
1500 @type lu: L{LogicalUnit}
1501 @param lu: the logical unit on whose behalf we execute
1502 @type nic: L{objects.NIC}
1503 @param nic: nic to convert to hooks tuple
1508 cluster = lu.cfg.GetClusterInfo()
1509 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1510 mode = filled_params[constants.NIC_MODE]
1511 link = filled_params[constants.NIC_LINK]
1515 net_uuid = lu.cfg.LookupNetwork(net)
1517 nobj = lu.cfg.GetNetwork(net_uuid)
1518 netinfo = objects.Network.ToDict(nobj)
1519 return (ip, mac, mode, link, net, netinfo)
1522 def _NICListToTuple(lu, nics):
1523 """Build a list of nic information tuples.
1525 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1526 value in LUInstanceQueryData.
1528 @type lu: L{LogicalUnit}
1529 @param lu: the logical unit on whose behalf we execute
1530 @type nics: list of L{objects.NIC}
1531 @param nics: list of nics to convert to hooks tuples
1536 hooks_nics.append(_NICToTuple(lu, nic))
1540 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1541 """Builds instance related env variables for hooks from an object.
1543 @type lu: L{LogicalUnit}
1544 @param lu: the logical unit on whose behalf we execute
1545 @type instance: L{objects.Instance}
1546 @param instance: the instance for which we should build the
1548 @type override: dict
1549 @param override: dictionary with key/values that will override
1552 @return: the hook environment dictionary
1555 cluster = lu.cfg.GetClusterInfo()
1556 bep = cluster.FillBE(instance)
1557 hvp = cluster.FillHV(instance)
1559 "name": instance.name,
1560 "primary_node": instance.primary_node,
1561 "secondary_nodes": instance.secondary_nodes,
1562 "os_type": instance.os,
1563 "status": instance.admin_state,
1564 "maxmem": bep[constants.BE_MAXMEM],
1565 "minmem": bep[constants.BE_MINMEM],
1566 "vcpus": bep[constants.BE_VCPUS],
1567 "nics": _NICListToTuple(lu, instance.nics),
1568 "disk_template": instance.disk_template,
1569 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1572 "hypervisor_name": instance.hypervisor,
1573 "tags": instance.tags,
1576 args.update(override)
1577 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1580 def _AdjustCandidatePool(lu, exceptions):
1581 """Adjust the candidate pool after node operations.
1584 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1586 lu.LogInfo("Promoted nodes to master candidate role: %s",
1587 utils.CommaJoin(node.name for node in mod_list))
1588 for name in mod_list:
1589 lu.context.ReaddNode(name)
1590 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1592 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1596 def _DecideSelfPromotion(lu, exceptions=None):
1597 """Decide whether I should promote myself as a master candidate.
1600 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1601 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1602 # the new node will increase mc_max with one, so:
1603 mc_should = min(mc_should + 1, cp_size)
1604 return mc_now < mc_should
1607 def _ComputeViolatingInstances(ipolicy, instances):
1608 """Computes a set of instances who violates given ipolicy.
1610 @param ipolicy: The ipolicy to verify
1611 @type instances: object.Instance
1612 @param instances: List of instances to verify
1613 @return: A frozenset of instance names violating the ipolicy
1616 return frozenset([inst.name for inst in instances
1617 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1620 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1621 """Check that the brigdes needed by a list of nics exist.
1624 cluster = lu.cfg.GetClusterInfo()
1625 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1626 brlist = [params[constants.NIC_LINK] for params in paramslist
1627 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1629 result = lu.rpc.call_bridges_exist(target_node, brlist)
1630 result.Raise("Error checking bridges on destination node '%s'" %
1631 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1634 def _CheckInstanceBridgesExist(lu, instance, node=None):
1635 """Check that the brigdes needed by an instance exist.
1639 node = instance.primary_node
1640 _CheckNicsBridgesExist(lu, instance.nics, node)
1643 def _CheckOSVariant(os_obj, name):
1644 """Check whether an OS name conforms to the os variants specification.
1646 @type os_obj: L{objects.OS}
1647 @param os_obj: OS object to check
1649 @param name: OS name passed by the user, to check for validity
1652 variant = objects.OS.GetVariant(name)
1653 if not os_obj.supported_variants:
1655 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1656 " passed)" % (os_obj.name, variant),
1660 raise errors.OpPrereqError("OS name must include a variant",
1663 if variant not in os_obj.supported_variants:
1664 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1667 def _GetNodeInstancesInner(cfg, fn):
1668 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1671 def _GetNodeInstances(cfg, node_name):
1672 """Returns a list of all primary and secondary instances on a node.
1676 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1679 def _GetNodePrimaryInstances(cfg, node_name):
1680 """Returns primary instances on a node.
1683 return _GetNodeInstancesInner(cfg,
1684 lambda inst: node_name == inst.primary_node)
1687 def _GetNodeSecondaryInstances(cfg, node_name):
1688 """Returns secondary instances on a node.
1691 return _GetNodeInstancesInner(cfg,
1692 lambda inst: node_name in inst.secondary_nodes)
1695 def _GetStorageTypeArgs(cfg, storage_type):
1696 """Returns the arguments for a storage type.
1699 # Special case for file storage
1700 if storage_type == constants.ST_FILE:
1701 # storage.FileStorage wants a list of storage directories
1702 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1707 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1710 for dev in instance.disks:
1711 cfg.SetDiskID(dev, node_name)
1713 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1715 result.Raise("Failed to get disk status from node %s" % node_name,
1716 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1718 for idx, bdev_status in enumerate(result.payload):
1719 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1725 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1726 """Check the sanity of iallocator and node arguments and use the
1727 cluster-wide iallocator if appropriate.
1729 Check that at most one of (iallocator, node) is specified. If none is
1730 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1731 then the LU's opcode's iallocator slot is filled with the cluster-wide
1734 @type iallocator_slot: string
1735 @param iallocator_slot: the name of the opcode iallocator slot
1736 @type node_slot: string
1737 @param node_slot: the name of the opcode target node slot
1740 node = getattr(lu.op, node_slot, None)
1741 ialloc = getattr(lu.op, iallocator_slot, None)
1745 if node is not None and ialloc is not None:
1746 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1748 elif ((node is None and ialloc is None) or
1749 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1750 default_iallocator = lu.cfg.GetDefaultIAllocator()
1751 if default_iallocator:
1752 setattr(lu.op, iallocator_slot, default_iallocator)
1754 raise errors.OpPrereqError("No iallocator or node given and no"
1755 " cluster-wide default iallocator found;"
1756 " please specify either an iallocator or a"
1757 " node, or set a cluster-wide default"
1758 " iallocator", errors.ECODE_INVAL)
1761 def _GetDefaultIAllocator(cfg, ialloc):
1762 """Decides on which iallocator to use.
1764 @type cfg: L{config.ConfigWriter}
1765 @param cfg: Cluster configuration object
1766 @type ialloc: string or None
1767 @param ialloc: Iallocator specified in opcode
1769 @return: Iallocator name
1773 # Use default iallocator
1774 ialloc = cfg.GetDefaultIAllocator()
1777 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1778 " opcode nor as a cluster-wide default",
1784 def _CheckHostnameSane(lu, name):
1785 """Ensures that a given hostname resolves to a 'sane' name.
1787 The given name is required to be a prefix of the resolved hostname,
1788 to prevent accidental mismatches.
1790 @param lu: the logical unit on behalf of which we're checking
1791 @param name: the name we should resolve and check
1792 @return: the resolved hostname object
1795 hostname = netutils.GetHostname(name=name)
1796 if hostname.name != name:
1797 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1798 if not utils.MatchNameComponent(name, [hostname.name]):
1799 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1800 " same as given hostname '%s'") %
1801 (hostname.name, name), errors.ECODE_INVAL)
1805 class LUClusterPostInit(LogicalUnit):
1806 """Logical unit for running hooks after cluster initialization.
1809 HPATH = "cluster-init"
1810 HTYPE = constants.HTYPE_CLUSTER
1812 def BuildHooksEnv(self):
1817 "OP_TARGET": self.cfg.GetClusterName(),
1820 def BuildHooksNodes(self):
1821 """Build hooks nodes.
1824 return ([], [self.cfg.GetMasterNode()])
1826 def Exec(self, feedback_fn):
1833 class LUClusterDestroy(LogicalUnit):
1834 """Logical unit for destroying the cluster.
1837 HPATH = "cluster-destroy"
1838 HTYPE = constants.HTYPE_CLUSTER
1840 def BuildHooksEnv(self):
1845 "OP_TARGET": self.cfg.GetClusterName(),
1848 def BuildHooksNodes(self):
1849 """Build hooks nodes.
1854 def CheckPrereq(self):
1855 """Check prerequisites.
1857 This checks whether the cluster is empty.
1859 Any errors are signaled by raising errors.OpPrereqError.
1862 master = self.cfg.GetMasterNode()
1864 nodelist = self.cfg.GetNodeList()
1865 if len(nodelist) != 1 or nodelist[0] != master:
1866 raise errors.OpPrereqError("There are still %d node(s) in"
1867 " this cluster." % (len(nodelist) - 1),
1869 instancelist = self.cfg.GetInstanceList()
1871 raise errors.OpPrereqError("There are still %d instance(s) in"
1872 " this cluster." % len(instancelist),
1875 def Exec(self, feedback_fn):
1876 """Destroys the cluster.
1879 master_params = self.cfg.GetMasterNetworkParameters()
1881 # Run post hooks on master node before it's removed
1882 _RunPostHook(self, master_params.name)
1884 ems = self.cfg.GetUseExternalMipScript()
1885 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1888 self.LogWarning("Error disabling the master IP address: %s",
1891 return master_params.name
1894 def _VerifyCertificate(filename):
1895 """Verifies a certificate for L{LUClusterVerifyConfig}.
1897 @type filename: string
1898 @param filename: Path to PEM file
1902 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1903 utils.ReadFile(filename))
1904 except Exception, err: # pylint: disable=W0703
1905 return (LUClusterVerifyConfig.ETYPE_ERROR,
1906 "Failed to load X509 certificate %s: %s" % (filename, err))
1909 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1910 constants.SSL_CERT_EXPIRATION_ERROR)
1913 fnamemsg = "While verifying %s: %s" % (filename, msg)
1918 return (None, fnamemsg)
1919 elif errcode == utils.CERT_WARNING:
1920 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1921 elif errcode == utils.CERT_ERROR:
1922 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1924 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1927 def _GetAllHypervisorParameters(cluster, instances):
1928 """Compute the set of all hypervisor parameters.
1930 @type cluster: L{objects.Cluster}
1931 @param cluster: the cluster object
1932 @param instances: list of L{objects.Instance}
1933 @param instances: additional instances from which to obtain parameters
1934 @rtype: list of (origin, hypervisor, parameters)
1935 @return: a list with all parameters found, indicating the hypervisor they
1936 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1941 for hv_name in cluster.enabled_hypervisors:
1942 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1944 for os_name, os_hvp in cluster.os_hvp.items():
1945 for hv_name, hv_params in os_hvp.items():
1947 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1948 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1950 # TODO: collapse identical parameter values in a single one
1951 for instance in instances:
1952 if instance.hvparams:
1953 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1954 cluster.FillHV(instance)))
1959 class _VerifyErrors(object):
1960 """Mix-in for cluster/group verify LUs.
1962 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1963 self.op and self._feedback_fn to be available.)
1967 ETYPE_FIELD = "code"
1968 ETYPE_ERROR = "ERROR"
1969 ETYPE_WARNING = "WARNING"
1971 def _Error(self, ecode, item, msg, *args, **kwargs):
1972 """Format an error message.
1974 Based on the opcode's error_codes parameter, either format a
1975 parseable error code, or a simpler error string.
1977 This must be called only from Exec and functions called from Exec.
1980 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1981 itype, etxt, _ = ecode
1982 # first complete the msg
1985 # then format the whole message
1986 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1987 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1993 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1994 # and finally report it via the feedback_fn
1995 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1997 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1998 """Log an error message if the passed condition is True.
2002 or self.op.debug_simulate_errors) # pylint: disable=E1101
2004 # If the error code is in the list of ignored errors, demote the error to a
2006 (_, etxt, _) = ecode
2007 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2008 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2011 self._Error(ecode, *args, **kwargs)
2013 # do not mark the operation as failed for WARN cases only
2014 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2015 self.bad = self.bad or cond
2018 class LUClusterVerify(NoHooksLU):
2019 """Submits all jobs necessary to verify the cluster.
2024 def ExpandNames(self):
2025 self.needed_locks = {}
2027 def Exec(self, feedback_fn):
2030 if self.op.group_name:
2031 groups = [self.op.group_name]
2032 depends_fn = lambda: None
2034 groups = self.cfg.GetNodeGroupList()
2036 # Verify global configuration
2038 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2041 # Always depend on global verification
2042 depends_fn = lambda: [(-len(jobs), [])]
2045 [opcodes.OpClusterVerifyGroup(group_name=group,
2046 ignore_errors=self.op.ignore_errors,
2047 depends=depends_fn())]
2048 for group in groups)
2050 # Fix up all parameters
2051 for op in itertools.chain(*jobs): # pylint: disable=W0142
2052 op.debug_simulate_errors = self.op.debug_simulate_errors
2053 op.verbose = self.op.verbose
2054 op.error_codes = self.op.error_codes
2056 op.skip_checks = self.op.skip_checks
2057 except AttributeError:
2058 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2060 return ResultWithJobs(jobs)
2063 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2064 """Verifies the cluster config.
2069 def _VerifyHVP(self, hvp_data):
2070 """Verifies locally the syntax of the hypervisor parameters.
2073 for item, hv_name, hv_params in hvp_data:
2074 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2077 hv_class = hypervisor.GetHypervisor(hv_name)
2078 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2079 hv_class.CheckParameterSyntax(hv_params)
2080 except errors.GenericError, err:
2081 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2083 def ExpandNames(self):
2084 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2085 self.share_locks = _ShareAll()
2087 def CheckPrereq(self):
2088 """Check prerequisites.
2091 # Retrieve all information
2092 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2093 self.all_node_info = self.cfg.GetAllNodesInfo()
2094 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2096 def Exec(self, feedback_fn):
2097 """Verify integrity of cluster, performing various test on nodes.
2101 self._feedback_fn = feedback_fn
2103 feedback_fn("* Verifying cluster config")
2105 for msg in self.cfg.VerifyConfig():
2106 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2108 feedback_fn("* Verifying cluster certificate files")
2110 for cert_filename in pathutils.ALL_CERT_FILES:
2111 (errcode, msg) = _VerifyCertificate(cert_filename)
2112 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2114 feedback_fn("* Verifying hypervisor parameters")
2116 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2117 self.all_inst_info.values()))
2119 feedback_fn("* Verifying all nodes belong to an existing group")
2121 # We do this verification here because, should this bogus circumstance
2122 # occur, it would never be caught by VerifyGroup, which only acts on
2123 # nodes/instances reachable from existing node groups.
2125 dangling_nodes = set(node.name for node in self.all_node_info.values()
2126 if node.group not in self.all_group_info)
2128 dangling_instances = {}
2129 no_node_instances = []
2131 for inst in self.all_inst_info.values():
2132 if inst.primary_node in dangling_nodes:
2133 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2134 elif inst.primary_node not in self.all_node_info:
2135 no_node_instances.append(inst.name)
2140 utils.CommaJoin(dangling_instances.get(node.name,
2142 for node in dangling_nodes]
2144 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2146 "the following nodes (and their instances) belong to a non"
2147 " existing group: %s", utils.CommaJoin(pretty_dangling))
2149 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2151 "the following instances have a non-existing primary-node:"
2152 " %s", utils.CommaJoin(no_node_instances))
2157 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2158 """Verifies the status of a node group.
2161 HPATH = "cluster-verify"
2162 HTYPE = constants.HTYPE_CLUSTER
2165 _HOOKS_INDENT_RE = re.compile("^", re.M)
2167 class NodeImage(object):
2168 """A class representing the logical and physical status of a node.
2171 @ivar name: the node name to which this object refers
2172 @ivar volumes: a structure as returned from
2173 L{ganeti.backend.GetVolumeList} (runtime)
2174 @ivar instances: a list of running instances (runtime)
2175 @ivar pinst: list of configured primary instances (config)
2176 @ivar sinst: list of configured secondary instances (config)
2177 @ivar sbp: dictionary of {primary-node: list of instances} for all
2178 instances for which this node is secondary (config)
2179 @ivar mfree: free memory, as reported by hypervisor (runtime)
2180 @ivar dfree: free disk, as reported by the node (runtime)
2181 @ivar offline: the offline status (config)
2182 @type rpc_fail: boolean
2183 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2184 not whether the individual keys were correct) (runtime)
2185 @type lvm_fail: boolean
2186 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2187 @type hyp_fail: boolean
2188 @ivar hyp_fail: whether the RPC call didn't return the instance list
2189 @type ghost: boolean
2190 @ivar ghost: whether this is a known node or not (config)
2191 @type os_fail: boolean
2192 @ivar os_fail: whether the RPC call didn't return valid OS data
2194 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2195 @type vm_capable: boolean
2196 @ivar vm_capable: whether the node can host instances
2199 def __init__(self, offline=False, name=None, vm_capable=True):
2208 self.offline = offline
2209 self.vm_capable = vm_capable
2210 self.rpc_fail = False
2211 self.lvm_fail = False
2212 self.hyp_fail = False
2214 self.os_fail = False
2217 def ExpandNames(self):
2218 # This raises errors.OpPrereqError on its own:
2219 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2221 # Get instances in node group; this is unsafe and needs verification later
2223 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2225 self.needed_locks = {
2226 locking.LEVEL_INSTANCE: inst_names,
2227 locking.LEVEL_NODEGROUP: [self.group_uuid],
2228 locking.LEVEL_NODE: [],
2231 self.share_locks = _ShareAll()
2233 def DeclareLocks(self, level):
2234 if level == locking.LEVEL_NODE:
2235 # Get members of node group; this is unsafe and needs verification later
2236 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2238 all_inst_info = self.cfg.GetAllInstancesInfo()
2240 # In Exec(), we warn about mirrored instances that have primary and
2241 # secondary living in separate node groups. To fully verify that
2242 # volumes for these instances are healthy, we will need to do an
2243 # extra call to their secondaries. We ensure here those nodes will
2245 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2246 # Important: access only the instances whose lock is owned
2247 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2248 nodes.update(all_inst_info[inst].secondary_nodes)
2250 self.needed_locks[locking.LEVEL_NODE] = nodes
2252 def CheckPrereq(self):
2253 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2254 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2256 group_nodes = set(self.group_info.members)
2258 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2261 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2263 unlocked_instances = \
2264 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2267 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2268 utils.CommaJoin(unlocked_nodes),
2271 if unlocked_instances:
2272 raise errors.OpPrereqError("Missing lock for instances: %s" %
2273 utils.CommaJoin(unlocked_instances),
2276 self.all_node_info = self.cfg.GetAllNodesInfo()
2277 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2279 self.my_node_names = utils.NiceSort(group_nodes)
2280 self.my_inst_names = utils.NiceSort(group_instances)
2282 self.my_node_info = dict((name, self.all_node_info[name])
2283 for name in self.my_node_names)
2285 self.my_inst_info = dict((name, self.all_inst_info[name])
2286 for name in self.my_inst_names)
2288 # We detect here the nodes that will need the extra RPC calls for verifying
2289 # split LV volumes; they should be locked.
2290 extra_lv_nodes = set()
2292 for inst in self.my_inst_info.values():
2293 if inst.disk_template in constants.DTS_INT_MIRROR:
2294 for nname in inst.all_nodes:
2295 if self.all_node_info[nname].group != self.group_uuid:
2296 extra_lv_nodes.add(nname)
2298 unlocked_lv_nodes = \
2299 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2301 if unlocked_lv_nodes:
2302 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2303 utils.CommaJoin(unlocked_lv_nodes),
2305 self.extra_lv_nodes = list(extra_lv_nodes)
2307 def _VerifyNode(self, ninfo, nresult):
2308 """Perform some basic validation on data returned from a node.
2310 - check the result data structure is well formed and has all the
2312 - check ganeti version
2314 @type ninfo: L{objects.Node}
2315 @param ninfo: the node to check
2316 @param nresult: the results from the node
2318 @return: whether overall this call was successful (and we can expect
2319 reasonable values in the respose)
2323 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2325 # main result, nresult should be a non-empty dict
2326 test = not nresult or not isinstance(nresult, dict)
2327 _ErrorIf(test, constants.CV_ENODERPC, node,
2328 "unable to verify node: no data returned")
2332 # compares ganeti version
2333 local_version = constants.PROTOCOL_VERSION
2334 remote_version = nresult.get("version", None)
2335 test = not (remote_version and
2336 isinstance(remote_version, (list, tuple)) and
2337 len(remote_version) == 2)
2338 _ErrorIf(test, constants.CV_ENODERPC, node,
2339 "connection to node returned invalid data")
2343 test = local_version != remote_version[0]
2344 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2345 "incompatible protocol versions: master %s,"
2346 " node %s", local_version, remote_version[0])
2350 # node seems compatible, we can actually try to look into its results
2352 # full package version
2353 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2354 constants.CV_ENODEVERSION, node,
2355 "software version mismatch: master %s, node %s",
2356 constants.RELEASE_VERSION, remote_version[1],
2357 code=self.ETYPE_WARNING)
2359 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2360 if ninfo.vm_capable and isinstance(hyp_result, dict):
2361 for hv_name, hv_result in hyp_result.iteritems():
2362 test = hv_result is not None
2363 _ErrorIf(test, constants.CV_ENODEHV, node,
2364 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2366 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2367 if ninfo.vm_capable and isinstance(hvp_result, list):
2368 for item, hv_name, hv_result in hvp_result:
2369 _ErrorIf(True, constants.CV_ENODEHV, node,
2370 "hypervisor %s parameter verify failure (source %s): %s",
2371 hv_name, item, hv_result)
2373 test = nresult.get(constants.NV_NODESETUP,
2374 ["Missing NODESETUP results"])
2375 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2380 def _VerifyNodeTime(self, ninfo, nresult,
2381 nvinfo_starttime, nvinfo_endtime):
2382 """Check the node time.
2384 @type ninfo: L{objects.Node}
2385 @param ninfo: the node to check
2386 @param nresult: the remote results for the node
2387 @param nvinfo_starttime: the start time of the RPC call
2388 @param nvinfo_endtime: the end time of the RPC call
2392 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394 ntime = nresult.get(constants.NV_TIME, None)
2396 ntime_merged = utils.MergeTime(ntime)
2397 except (ValueError, TypeError):
2398 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2401 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2402 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2403 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2404 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2408 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2409 "Node time diverges by at least %s from master node time",
2412 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2413 """Check the node LVM results.
2415 @type ninfo: L{objects.Node}
2416 @param ninfo: the node to check
2417 @param nresult: the remote results for the node
2418 @param vg_name: the configured VG name
2425 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427 # checks vg existence and size > 20G
2428 vglist = nresult.get(constants.NV_VGLIST, None)
2430 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2432 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2433 constants.MIN_VG_SIZE)
2434 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2437 pvlist = nresult.get(constants.NV_PVLIST, None)
2438 test = pvlist is None
2439 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2441 # check that ':' is not present in PV names, since it's a
2442 # special character for lvcreate (denotes the range of PEs to
2444 for _, pvname, owner_vg in pvlist:
2445 test = ":" in pvname
2446 _ErrorIf(test, constants.CV_ENODELVM, node,
2447 "Invalid character ':' in PV '%s' of VG '%s'",
2450 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2451 """Check the node bridges.
2453 @type ninfo: L{objects.Node}
2454 @param ninfo: the node to check
2455 @param nresult: the remote results for the node
2456 @param bridges: the expected list of bridges
2463 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2465 missing = nresult.get(constants.NV_BRIDGES, None)
2466 test = not isinstance(missing, list)
2467 _ErrorIf(test, constants.CV_ENODENET, node,
2468 "did not return valid bridge information")
2470 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2471 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2473 def _VerifyNodeUserScripts(self, ninfo, nresult):
2474 """Check the results of user scripts presence and executability on the node
2476 @type ninfo: L{objects.Node}
2477 @param ninfo: the node to check
2478 @param nresult: the remote results for the node
2483 test = not constants.NV_USERSCRIPTS in nresult
2484 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2485 "did not return user scripts information")
2487 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2489 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2490 "user scripts not present or not executable: %s" %
2491 utils.CommaJoin(sorted(broken_scripts)))
2493 def _VerifyNodeNetwork(self, ninfo, nresult):
2494 """Check the node network connectivity results.
2496 @type ninfo: L{objects.Node}
2497 @param ninfo: the node to check
2498 @param nresult: the remote results for the node
2502 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2504 test = constants.NV_NODELIST not in nresult
2505 _ErrorIf(test, constants.CV_ENODESSH, node,
2506 "node hasn't returned node ssh connectivity data")
2508 if nresult[constants.NV_NODELIST]:
2509 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2510 _ErrorIf(True, constants.CV_ENODESSH, node,
2511 "ssh communication with node '%s': %s", a_node, a_msg)
2513 test = constants.NV_NODENETTEST not in nresult
2514 _ErrorIf(test, constants.CV_ENODENET, node,
2515 "node hasn't returned node tcp connectivity data")
2517 if nresult[constants.NV_NODENETTEST]:
2518 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2520 _ErrorIf(True, constants.CV_ENODENET, node,
2521 "tcp communication with node '%s': %s",
2522 anode, nresult[constants.NV_NODENETTEST][anode])
2524 test = constants.NV_MASTERIP not in nresult
2525 _ErrorIf(test, constants.CV_ENODENET, node,
2526 "node hasn't returned node master IP reachability data")
2528 if not nresult[constants.NV_MASTERIP]:
2529 if node == self.master_node:
2530 msg = "the master node cannot reach the master IP (not configured?)"
2532 msg = "cannot reach the master IP"
2533 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2535 def _VerifyInstance(self, instance, instanceconfig, node_image,
2537 """Verify an instance.
2539 This function checks to see if the required block devices are
2540 available on the instance's node.
2543 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2544 node_current = instanceconfig.primary_node
2546 node_vol_should = {}
2547 instanceconfig.MapLVsByNode(node_vol_should)
2549 cluster = self.cfg.GetClusterInfo()
2550 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2552 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2553 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2555 for node in node_vol_should:
2556 n_img = node_image[node]
2557 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2558 # ignore missing volumes on offline or broken nodes
2560 for volume in node_vol_should[node]:
2561 test = volume not in n_img.volumes
2562 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2563 "volume %s missing on node %s", volume, node)
2565 if instanceconfig.admin_state == constants.ADMINST_UP:
2566 pri_img = node_image[node_current]
2567 test = instance not in pri_img.instances and not pri_img.offline
2568 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2569 "instance not running on its primary node %s",
2572 diskdata = [(nname, success, status, idx)
2573 for (nname, disks) in diskstatus.items()
2574 for idx, (success, status) in enumerate(disks)]
2576 for nname, success, bdev_status, idx in diskdata:
2577 # the 'ghost node' construction in Exec() ensures that we have a
2579 snode = node_image[nname]
2580 bad_snode = snode.ghost or snode.offline
2581 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2582 not success and not bad_snode,
2583 constants.CV_EINSTANCEFAULTYDISK, instance,
2584 "couldn't retrieve status for disk/%s on %s: %s",
2585 idx, nname, bdev_status)
2586 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2587 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2588 constants.CV_EINSTANCEFAULTYDISK, instance,
2589 "disk/%s on %s is faulty", idx, nname)
2591 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2592 """Verify if there are any unknown volumes in the cluster.
2594 The .os, .swap and backup volumes are ignored. All other volumes are
2595 reported as unknown.
2597 @type reserved: L{ganeti.utils.FieldSet}
2598 @param reserved: a FieldSet of reserved volume names
2601 for node, n_img in node_image.items():
2602 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2603 self.all_node_info[node].group != self.group_uuid):
2604 # skip non-healthy nodes
2606 for volume in n_img.volumes:
2607 test = ((node not in node_vol_should or
2608 volume not in node_vol_should[node]) and
2609 not reserved.Matches(volume))
2610 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2611 "volume %s is unknown", volume)
2613 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2614 """Verify N+1 Memory Resilience.
2616 Check that if one single node dies we can still start all the
2617 instances it was primary for.
2620 cluster_info = self.cfg.GetClusterInfo()
2621 for node, n_img in node_image.items():
2622 # This code checks that every node which is now listed as
2623 # secondary has enough memory to host all instances it is
2624 # supposed to should a single other node in the cluster fail.
2625 # FIXME: not ready for failover to an arbitrary node
2626 # FIXME: does not support file-backed instances
2627 # WARNING: we currently take into account down instances as well
2628 # as up ones, considering that even if they're down someone
2629 # might want to start them even in the event of a node failure.
2630 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2631 # we're skipping nodes marked offline and nodes in other groups from
2632 # the N+1 warning, since most likely we don't have good memory
2633 # infromation from them; we already list instances living on such
2634 # nodes, and that's enough warning
2636 #TODO(dynmem): also consider ballooning out other instances
2637 for prinode, instances in n_img.sbp.items():
2639 for instance in instances:
2640 bep = cluster_info.FillBE(instance_cfg[instance])
2641 if bep[constants.BE_AUTO_BALANCE]:
2642 needed_mem += bep[constants.BE_MINMEM]
2643 test = n_img.mfree < needed_mem
2644 self._ErrorIf(test, constants.CV_ENODEN1, node,
2645 "not enough memory to accomodate instance failovers"
2646 " should node %s fail (%dMiB needed, %dMiB available)",
2647 prinode, needed_mem, n_img.mfree)
2650 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2651 (files_all, files_opt, files_mc, files_vm)):
2652 """Verifies file checksums collected from all nodes.
2654 @param errorif: Callback for reporting errors
2655 @param nodeinfo: List of L{objects.Node} objects
2656 @param master_node: Name of master node
2657 @param all_nvinfo: RPC results
2660 # Define functions determining which nodes to consider for a file
2663 (files_mc, lambda node: (node.master_candidate or
2664 node.name == master_node)),
2665 (files_vm, lambda node: node.vm_capable),
2668 # Build mapping from filename to list of nodes which should have the file
2670 for (files, fn) in files2nodefn:
2672 filenodes = nodeinfo
2674 filenodes = filter(fn, nodeinfo)
2675 nodefiles.update((filename,
2676 frozenset(map(operator.attrgetter("name"), filenodes)))
2677 for filename in files)
2679 assert set(nodefiles) == (files_all | files_mc | files_vm)
2681 fileinfo = dict((filename, {}) for filename in nodefiles)
2682 ignore_nodes = set()
2684 for node in nodeinfo:
2686 ignore_nodes.add(node.name)
2689 nresult = all_nvinfo[node.name]
2691 if nresult.fail_msg or not nresult.payload:
2694 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2695 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2696 for (key, value) in fingerprints.items())
2699 test = not (node_files and isinstance(node_files, dict))
2700 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2701 "Node did not return file checksum data")
2703 ignore_nodes.add(node.name)
2706 # Build per-checksum mapping from filename to nodes having it
2707 for (filename, checksum) in node_files.items():
2708 assert filename in nodefiles
2709 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2711 for (filename, checksums) in fileinfo.items():
2712 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2714 # Nodes having the file
2715 with_file = frozenset(node_name
2716 for nodes in fileinfo[filename].values()
2717 for node_name in nodes) - ignore_nodes
2719 expected_nodes = nodefiles[filename] - ignore_nodes
2721 # Nodes missing file
2722 missing_file = expected_nodes - with_file
2724 if filename in files_opt:
2726 errorif(missing_file and missing_file != expected_nodes,
2727 constants.CV_ECLUSTERFILECHECK, None,
2728 "File %s is optional, but it must exist on all or no"
2729 " nodes (not found on %s)",
2730 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2732 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2733 "File %s is missing from node(s) %s", filename,
2734 utils.CommaJoin(utils.NiceSort(missing_file)))
2736 # Warn if a node has a file it shouldn't
2737 unexpected = with_file - expected_nodes
2739 constants.CV_ECLUSTERFILECHECK, None,
2740 "File %s should not exist on node(s) %s",
2741 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2743 # See if there are multiple versions of the file
2744 test = len(checksums) > 1
2746 variants = ["variant %s on %s" %
2747 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2748 for (idx, (checksum, nodes)) in
2749 enumerate(sorted(checksums.items()))]
2753 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2754 "File %s found with %s different checksums (%s)",
2755 filename, len(checksums), "; ".join(variants))
2757 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2759 """Verifies and the node DRBD status.
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param instanceinfo: the dict of instances
2765 @param drbd_helper: the configured DRBD usermode helper
2766 @param drbd_map: the DRBD map as returned by
2767 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2771 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2774 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2775 test = (helper_result is None)
2776 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2777 "no drbd usermode helper returned")
2779 status, payload = helper_result
2781 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2782 "drbd usermode helper check unsuccessful: %s", payload)
2783 test = status and (payload != drbd_helper)
2784 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785 "wrong drbd usermode helper: %s", payload)
2787 # compute the DRBD minors
2789 for minor, instance in drbd_map[node].items():
2790 test = instance not in instanceinfo
2791 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2792 "ghost instance '%s' in temporary DRBD map", instance)
2793 # ghost instance should not be running, but otherwise we
2794 # don't give double warnings (both ghost instance and
2795 # unallocated minor in use)
2797 node_drbd[minor] = (instance, False)
2799 instance = instanceinfo[instance]
2800 node_drbd[minor] = (instance.name,
2801 instance.admin_state == constants.ADMINST_UP)
2803 # and now check them
2804 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2805 test = not isinstance(used_minors, (tuple, list))
2806 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2807 "cannot parse drbd status file: %s", str(used_minors))
2809 # we cannot check drbd status
2812 for minor, (iname, must_exist) in node_drbd.items():
2813 test = minor not in used_minors and must_exist
2814 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2815 "drbd minor %d of instance %s is not active", minor, iname)
2816 for minor in used_minors:
2817 test = minor not in node_drbd
2818 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2819 "unallocated drbd minor %d is in use", minor)
2821 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2822 """Builds the node OS structures.
2824 @type ninfo: L{objects.Node}
2825 @param ninfo: the node to check
2826 @param nresult: the remote results for the node
2827 @param nimg: the node image object
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2833 remote_os = nresult.get(constants.NV_OSLIST, None)
2834 test = (not isinstance(remote_os, list) or
2835 not compat.all(isinstance(v, list) and len(v) == 7
2836 for v in remote_os))
2838 _ErrorIf(test, constants.CV_ENODEOS, node,
2839 "node hasn't returned valid OS data")
2848 for (name, os_path, status, diagnose,
2849 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2851 if name not in os_dict:
2854 # parameters is a list of lists instead of list of tuples due to
2855 # JSON lacking a real tuple type, fix it:
2856 parameters = [tuple(v) for v in parameters]
2857 os_dict[name].append((os_path, status, diagnose,
2858 set(variants), set(parameters), set(api_ver)))
2860 nimg.oslist = os_dict
2862 def _VerifyNodeOS(self, ninfo, nimg, base):
2863 """Verifies the node OS list.
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nimg: the node image object
2868 @param base: the 'template' node we match against (e.g. from the master)
2872 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2874 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2876 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2877 for os_name, os_data in nimg.oslist.items():
2878 assert os_data, "Empty OS status for OS %s?!" % os_name
2879 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2880 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2881 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2882 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2883 "OS '%s' has multiple entries (first one shadows the rest): %s",
2884 os_name, utils.CommaJoin([v[0] for v in os_data]))
2885 # comparisons with the 'base' image
2886 test = os_name not in base.oslist
2887 _ErrorIf(test, constants.CV_ENODEOS, node,
2888 "Extra OS %s not present on reference node (%s)",
2892 assert base.oslist[os_name], "Base node has empty OS status?"
2893 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2895 # base OS is invalid, skipping
2897 for kind, a, b in [("API version", f_api, b_api),
2898 ("variants list", f_var, b_var),
2899 ("parameters", beautify_params(f_param),
2900 beautify_params(b_param))]:
2901 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2902 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2903 kind, os_name, base.name,
2904 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2906 # check any missing OSes
2907 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2908 _ErrorIf(missing, constants.CV_ENODEOS, node,
2909 "OSes present on reference node %s but missing on this node: %s",
2910 base.name, utils.CommaJoin(missing))
2912 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2913 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2915 @type ninfo: L{objects.Node}
2916 @param ninfo: the node to check
2917 @param nresult: the remote results for the node
2918 @type is_master: bool
2919 @param is_master: Whether node is the master node
2925 (constants.ENABLE_FILE_STORAGE or
2926 constants.ENABLE_SHARED_FILE_STORAGE)):
2928 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2930 # This should never happen
2931 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2932 "Node did not return forbidden file storage paths")
2934 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2935 "Found forbidden file storage paths: %s",
2936 utils.CommaJoin(fspaths))
2938 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2939 constants.CV_ENODEFILESTORAGEPATHS, node,
2940 "Node should not have returned forbidden file storage"
2943 def _VerifyOob(self, ninfo, nresult):
2944 """Verifies out of band functionality of a node.
2946 @type ninfo: L{objects.Node}
2947 @param ninfo: the node to check
2948 @param nresult: the remote results for the node
2952 # We just have to verify the paths on master and/or master candidates
2953 # as the oob helper is invoked on the master
2954 if ((ninfo.master_candidate or ninfo.master_capable) and
2955 constants.NV_OOB_PATHS in nresult):
2956 for path_result in nresult[constants.NV_OOB_PATHS]:
2957 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2959 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2960 """Verifies and updates the node volume data.
2962 This function will update a L{NodeImage}'s internal structures
2963 with data from the remote call.
2965 @type ninfo: L{objects.Node}
2966 @param ninfo: the node to check
2967 @param nresult: the remote results for the node
2968 @param nimg: the node image object
2969 @param vg_name: the configured VG name
2973 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2975 nimg.lvm_fail = True
2976 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2979 elif isinstance(lvdata, basestring):
2980 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2981 utils.SafeEncode(lvdata))
2982 elif not isinstance(lvdata, dict):
2983 _ErrorIf(True, constants.CV_ENODELVM, node,
2984 "rpc call to node failed (lvlist)")
2986 nimg.volumes = lvdata
2987 nimg.lvm_fail = False
2989 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2990 """Verifies and updates the node instance list.
2992 If the listing was successful, then updates this node's instance
2993 list. Otherwise, it marks the RPC call as failed for the instance
2996 @type ninfo: L{objects.Node}
2997 @param ninfo: the node to check
2998 @param nresult: the remote results for the node
2999 @param nimg: the node image object
3002 idata = nresult.get(constants.NV_INSTANCELIST, None)
3003 test = not isinstance(idata, list)
3004 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3005 "rpc call to node failed (instancelist): %s",
3006 utils.SafeEncode(str(idata)))
3008 nimg.hyp_fail = True
3010 nimg.instances = idata
3012 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3013 """Verifies and computes a node information map
3015 @type ninfo: L{objects.Node}
3016 @param ninfo: the node to check
3017 @param nresult: the remote results for the node
3018 @param nimg: the node image object
3019 @param vg_name: the configured VG name
3023 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3025 # try to read free memory (from the hypervisor)
3026 hv_info = nresult.get(constants.NV_HVINFO, None)
3027 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3028 _ErrorIf(test, constants.CV_ENODEHV, node,
3029 "rpc call to node failed (hvinfo)")
3032 nimg.mfree = int(hv_info["memory_free"])
3033 except (ValueError, TypeError):
3034 _ErrorIf(True, constants.CV_ENODERPC, node,
3035 "node returned invalid nodeinfo, check hypervisor")
3037 # FIXME: devise a free space model for file based instances as well
3038 if vg_name is not None:
3039 test = (constants.NV_VGLIST not in nresult or
3040 vg_name not in nresult[constants.NV_VGLIST])
3041 _ErrorIf(test, constants.CV_ENODELVM, node,
3042 "node didn't return data for the volume group '%s'"
3043 " - it is either missing or broken", vg_name)
3046 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3047 except (ValueError, TypeError):
3048 _ErrorIf(True, constants.CV_ENODERPC, node,
3049 "node returned invalid LVM info, check LVM status")
3051 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3052 """Gets per-disk status information for all instances.
3054 @type nodelist: list of strings
3055 @param nodelist: Node names
3056 @type node_image: dict of (name, L{objects.Node})
3057 @param node_image: Node objects
3058 @type instanceinfo: dict of (name, L{objects.Instance})
3059 @param instanceinfo: Instance objects
3060 @rtype: {instance: {node: [(succes, payload)]}}
3061 @return: a dictionary of per-instance dictionaries with nodes as
3062 keys and disk information as values; the disk information is a
3063 list of tuples (success, payload)
3066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3069 node_disks_devonly = {}
3070 diskless_instances = set()
3071 diskless = constants.DT_DISKLESS
3073 for nname in nodelist:
3074 node_instances = list(itertools.chain(node_image[nname].pinst,
3075 node_image[nname].sinst))
3076 diskless_instances.update(inst for inst in node_instances
3077 if instanceinfo[inst].disk_template == diskless)
3078 disks = [(inst, disk)
3079 for inst in node_instances
3080 for disk in instanceinfo[inst].disks]
3083 # No need to collect data
3086 node_disks[nname] = disks
3088 # _AnnotateDiskParams makes already copies of the disks
3090 for (inst, dev) in disks:
3091 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3092 self.cfg.SetDiskID(anno_disk, nname)
3093 devonly.append(anno_disk)
3095 node_disks_devonly[nname] = devonly
3097 assert len(node_disks) == len(node_disks_devonly)
3099 # Collect data from all nodes with disks
3100 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3103 assert len(result) == len(node_disks)
3107 for (nname, nres) in result.items():
3108 disks = node_disks[nname]
3111 # No data from this node
3112 data = len(disks) * [(False, "node offline")]
3115 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3116 "while getting disk information: %s", msg)
3118 # No data from this node
3119 data = len(disks) * [(False, msg)]
3122 for idx, i in enumerate(nres.payload):
3123 if isinstance(i, (tuple, list)) and len(i) == 2:
3126 logging.warning("Invalid result from node %s, entry %d: %s",
3128 data.append((False, "Invalid result from the remote node"))
3130 for ((inst, _), status) in zip(disks, data):
3131 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3133 # Add empty entries for diskless instances.
3134 for inst in diskless_instances:
3135 assert inst not in instdisk
3138 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3139 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3140 compat.all(isinstance(s, (tuple, list)) and
3141 len(s) == 2 for s in statuses)
3142 for inst, nnames in instdisk.items()
3143 for nname, statuses in nnames.items())
3144 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3149 def _SshNodeSelector(group_uuid, all_nodes):
3150 """Create endless iterators for all potential SSH check hosts.
3153 nodes = [node for node in all_nodes
3154 if (node.group != group_uuid and
3156 keyfunc = operator.attrgetter("group")
3158 return map(itertools.cycle,
3159 [sorted(map(operator.attrgetter("name"), names))
3160 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3164 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3165 """Choose which nodes should talk to which other nodes.
3167 We will make nodes contact all nodes in their group, and one node from
3170 @warning: This algorithm has a known issue if one node group is much
3171 smaller than others (e.g. just one node). In such a case all other
3172 nodes will talk to the single node.
3175 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3176 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3178 return (online_nodes,
3179 dict((name, sorted([i.next() for i in sel]))
3180 for name in online_nodes))
3182 def BuildHooksEnv(self):
3185 Cluster-Verify hooks just ran in the post phase and their failure makes
3186 the output be logged in the verify output and the verification to fail.
3190 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3193 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3194 for node in self.my_node_info.values())
3198 def BuildHooksNodes(self):
3199 """Build hooks nodes.
3202 return ([], self.my_node_names)
3204 def Exec(self, feedback_fn):
3205 """Verify integrity of the node group, performing various test on nodes.
3208 # This method has too many local variables. pylint: disable=R0914
3209 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3211 if not self.my_node_names:
3213 feedback_fn("* Empty node group, skipping verification")
3217 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3218 verbose = self.op.verbose
3219 self._feedback_fn = feedback_fn
3221 vg_name = self.cfg.GetVGName()
3222 drbd_helper = self.cfg.GetDRBDHelper()
3223 cluster = self.cfg.GetClusterInfo()
3224 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3225 hypervisors = cluster.enabled_hypervisors
3226 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3228 i_non_redundant = [] # Non redundant instances
3229 i_non_a_balanced = [] # Non auto-balanced instances
3230 i_offline = 0 # Count of offline instances
3231 n_offline = 0 # Count of offline nodes
3232 n_drained = 0 # Count of nodes being drained
3233 node_vol_should = {}
3235 # FIXME: verify OS list
3238 filemap = _ComputeAncillaryFiles(cluster, False)
3240 # do local checksums
3241 master_node = self.master_node = self.cfg.GetMasterNode()
3242 master_ip = self.cfg.GetMasterIP()
3244 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3247 if self.cfg.GetUseExternalMipScript():
3248 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3250 node_verify_param = {
3251 constants.NV_FILELIST:
3252 map(vcluster.MakeVirtualPath,
3253 utils.UniqueSequence(filename
3254 for files in filemap
3255 for filename in files)),
3256 constants.NV_NODELIST:
3257 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3258 self.all_node_info.values()),
3259 constants.NV_HYPERVISOR: hypervisors,
3260 constants.NV_HVPARAMS:
3261 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3262 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3263 for node in node_data_list
3264 if not node.offline],
3265 constants.NV_INSTANCELIST: hypervisors,
3266 constants.NV_VERSION: None,
3267 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3268 constants.NV_NODESETUP: None,
3269 constants.NV_TIME: None,
3270 constants.NV_MASTERIP: (master_node, master_ip),
3271 constants.NV_OSLIST: None,
3272 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3273 constants.NV_USERSCRIPTS: user_scripts,
3276 if vg_name is not None:
3277 node_verify_param[constants.NV_VGLIST] = None
3278 node_verify_param[constants.NV_LVLIST] = vg_name
3279 node_verify_param[constants.NV_PVLIST] = [vg_name]
3282 node_verify_param[constants.NV_DRBDLIST] = None
3283 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3285 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3286 # Load file storage paths only from master node
3287 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3290 # FIXME: this needs to be changed per node-group, not cluster-wide
3292 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3293 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3294 bridges.add(default_nicpp[constants.NIC_LINK])
3295 for instance in self.my_inst_info.values():
3296 for nic in instance.nics:
3297 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3298 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3299 bridges.add(full_nic[constants.NIC_LINK])
3302 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3304 # Build our expected cluster state
3305 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3307 vm_capable=node.vm_capable))
3308 for node in node_data_list)
3312 for node in self.all_node_info.values():
3313 path = _SupportsOob(self.cfg, node)
3314 if path and path not in oob_paths:
3315 oob_paths.append(path)
3318 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3320 for instance in self.my_inst_names:
3321 inst_config = self.my_inst_info[instance]
3322 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3325 for nname in inst_config.all_nodes:
3326 if nname not in node_image:
3327 gnode = self.NodeImage(name=nname)
3328 gnode.ghost = (nname not in self.all_node_info)
3329 node_image[nname] = gnode
3331 inst_config.MapLVsByNode(node_vol_should)
3333 pnode = inst_config.primary_node
3334 node_image[pnode].pinst.append(instance)
3336 for snode in inst_config.secondary_nodes:
3337 nimg = node_image[snode]
3338 nimg.sinst.append(instance)
3339 if pnode not in nimg.sbp:
3340 nimg.sbp[pnode] = []
3341 nimg.sbp[pnode].append(instance)
3343 # At this point, we have the in-memory data structures complete,
3344 # except for the runtime information, which we'll gather next
3346 # Due to the way our RPC system works, exact response times cannot be
3347 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3348 # time before and after executing the request, we can at least have a time
3350 nvinfo_starttime = time.time()
3351 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3353 self.cfg.GetClusterName())
3354 nvinfo_endtime = time.time()
3356 if self.extra_lv_nodes and vg_name is not None:
3358 self.rpc.call_node_verify(self.extra_lv_nodes,
3359 {constants.NV_LVLIST: vg_name},
3360 self.cfg.GetClusterName())
3362 extra_lv_nvinfo = {}
3364 all_drbd_map = self.cfg.ComputeDRBDMap()
3366 feedback_fn("* Gathering disk information (%s nodes)" %
3367 len(self.my_node_names))
3368 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3371 feedback_fn("* Verifying configuration file consistency")
3373 # If not all nodes are being checked, we need to make sure the master node
3374 # and a non-checked vm_capable node are in the list.
3375 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3377 vf_nvinfo = all_nvinfo.copy()
3378 vf_node_info = list(self.my_node_info.values())
3379 additional_nodes = []
3380 if master_node not in self.my_node_info:
3381 additional_nodes.append(master_node)
3382 vf_node_info.append(self.all_node_info[master_node])
3383 # Add the first vm_capable node we find which is not included,
3384 # excluding the master node (which we already have)
3385 for node in absent_nodes:
3386 nodeinfo = self.all_node_info[node]
3387 if (nodeinfo.vm_capable and not nodeinfo.offline and
3388 node != master_node):
3389 additional_nodes.append(node)
3390 vf_node_info.append(self.all_node_info[node])
3392 key = constants.NV_FILELIST
3393 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3394 {key: node_verify_param[key]},
3395 self.cfg.GetClusterName()))
3397 vf_nvinfo = all_nvinfo
3398 vf_node_info = self.my_node_info.values()
3400 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3402 feedback_fn("* Verifying node status")
3406 for node_i in node_data_list:
3408 nimg = node_image[node]
3412 feedback_fn("* Skipping offline node %s" % (node,))
3416 if node == master_node:
3418 elif node_i.master_candidate:
3419 ntype = "master candidate"
3420 elif node_i.drained:
3426 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3428 msg = all_nvinfo[node].fail_msg
3429 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3432 nimg.rpc_fail = True
3435 nresult = all_nvinfo[node].payload
3437 nimg.call_ok = self._VerifyNode(node_i, nresult)
3438 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3439 self._VerifyNodeNetwork(node_i, nresult)
3440 self._VerifyNodeUserScripts(node_i, nresult)
3441 self._VerifyOob(node_i, nresult)
3442 self._VerifyFileStoragePaths(node_i, nresult,
3443 node == master_node)
3446 self._VerifyNodeLVM(node_i, nresult, vg_name)
3447 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3450 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3451 self._UpdateNodeInstances(node_i, nresult, nimg)
3452 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3453 self._UpdateNodeOS(node_i, nresult, nimg)
3455 if not nimg.os_fail:
3456 if refos_img is None:
3458 self._VerifyNodeOS(node_i, nimg, refos_img)
3459 self._VerifyNodeBridges(node_i, nresult, bridges)
3461 # Check whether all running instancies are primary for the node. (This
3462 # can no longer be done from _VerifyInstance below, since some of the
3463 # wrong instances could be from other node groups.)
3464 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3466 for inst in non_primary_inst:
3467 test = inst in self.all_inst_info
3468 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3469 "instance should not run on node %s", node_i.name)
3470 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3471 "node is running unknown instance %s", inst)
3473 for node, result in extra_lv_nvinfo.items():
3474 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3475 node_image[node], vg_name)
3477 feedback_fn("* Verifying instance status")
3478 for instance in self.my_inst_names:
3480 feedback_fn("* Verifying instance %s" % instance)
3481 inst_config = self.my_inst_info[instance]
3482 self._VerifyInstance(instance, inst_config, node_image,
3484 inst_nodes_offline = []
3486 pnode = inst_config.primary_node
3487 pnode_img = node_image[pnode]
3488 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3489 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3490 " primary node failed", instance)
3492 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3494 constants.CV_EINSTANCEBADNODE, instance,
3495 "instance is marked as running and lives on offline node %s",
3496 inst_config.primary_node)
3498 # If the instance is non-redundant we cannot survive losing its primary
3499 # node, so we are not N+1 compliant.
3500 if inst_config.disk_template not in constants.DTS_MIRRORED:
3501 i_non_redundant.append(instance)
3503 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3504 constants.CV_EINSTANCELAYOUT,
3505 instance, "instance has multiple secondary nodes: %s",
3506 utils.CommaJoin(inst_config.secondary_nodes),
3507 code=self.ETYPE_WARNING)
3509 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3510 pnode = inst_config.primary_node
3511 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3512 instance_groups = {}
3514 for node in instance_nodes:
3515 instance_groups.setdefault(self.all_node_info[node].group,
3519 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3520 # Sort so that we always list the primary node first.
3521 for group, nodes in sorted(instance_groups.items(),
3522 key=lambda (_, nodes): pnode in nodes,
3525 self._ErrorIf(len(instance_groups) > 1,
3526 constants.CV_EINSTANCESPLITGROUPS,
3527 instance, "instance has primary and secondary nodes in"
3528 " different groups: %s", utils.CommaJoin(pretty_list),
3529 code=self.ETYPE_WARNING)
3531 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3532 i_non_a_balanced.append(instance)
3534 for snode in inst_config.secondary_nodes:
3535 s_img = node_image[snode]
3536 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3537 snode, "instance %s, connection to secondary node failed",
3541 inst_nodes_offline.append(snode)
3543 # warn that the instance lives on offline nodes
3544 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3545 "instance has offline secondary node(s) %s",
3546 utils.CommaJoin(inst_nodes_offline))
3547 # ... or ghost/non-vm_capable nodes
3548 for node in inst_config.all_nodes:
3549 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3550 instance, "instance lives on ghost node %s", node)
3551 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3552 instance, "instance lives on non-vm_capable node %s", node)
3554 feedback_fn("* Verifying orphan volumes")
3555 reserved = utils.FieldSet(*cluster.reserved_lvs)
3557 # We will get spurious "unknown volume" warnings if any node of this group
3558 # is secondary for an instance whose primary is in another group. To avoid
3559 # them, we find these instances and add their volumes to node_vol_should.
3560 for inst in self.all_inst_info.values():
3561 for secondary in inst.secondary_nodes:
3562 if (secondary in self.my_node_info
3563 and inst.name not in self.my_inst_info):
3564 inst.MapLVsByNode(node_vol_should)
3567 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3569 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3570 feedback_fn("* Verifying N+1 Memory redundancy")
3571 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3573 feedback_fn("* Other Notes")
3575 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3576 % len(i_non_redundant))
3578 if i_non_a_balanced:
3579 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3580 % len(i_non_a_balanced))
3583 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3586 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3589 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3593 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3594 """Analyze the post-hooks' result
3596 This method analyses the hook result, handles it, and sends some
3597 nicely-formatted feedback back to the user.
3599 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3600 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3601 @param hooks_results: the results of the multi-node hooks rpc call
3602 @param feedback_fn: function used send feedback back to the caller
3603 @param lu_result: previous Exec result
3604 @return: the new Exec result, based on the previous result
3608 # We only really run POST phase hooks, only for non-empty groups,
3609 # and are only interested in their results
3610 if not self.my_node_names:
3613 elif phase == constants.HOOKS_PHASE_POST:
3614 # Used to change hooks' output to proper indentation
3615 feedback_fn("* Hooks Results")
3616 assert hooks_results, "invalid result from hooks"
3618 for node_name in hooks_results:
3619 res = hooks_results[node_name]
3621 test = msg and not res.offline
3622 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3623 "Communication failure in hooks execution: %s", msg)
3624 if res.offline or msg:
3625 # No need to investigate payload if node is offline or gave
3628 for script, hkr, output in res.payload:
3629 test = hkr == constants.HKR_FAIL
3630 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3631 "Script %s failed, output:", script)
3633 output = self._HOOKS_INDENT_RE.sub(" ", output)
3634 feedback_fn("%s" % output)
3640 class LUClusterVerifyDisks(NoHooksLU):
3641 """Verifies the cluster disks status.
3646 def ExpandNames(self):
3647 self.share_locks = _ShareAll()
3648 self.needed_locks = {
3649 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3652 def Exec(self, feedback_fn):
3653 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3655 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3656 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3657 for group in group_names])
3660 class LUGroupVerifyDisks(NoHooksLU):
3661 """Verifies the status of all disks in a node group.
3666 def ExpandNames(self):
3667 # Raises errors.OpPrereqError on its own if group can't be found
3668 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3670 self.share_locks = _ShareAll()
3671 self.needed_locks = {
3672 locking.LEVEL_INSTANCE: [],
3673 locking.LEVEL_NODEGROUP: [],
3674 locking.LEVEL_NODE: [],
3677 def DeclareLocks(self, level):
3678 if level == locking.LEVEL_INSTANCE:
3679 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3681 # Lock instances optimistically, needs verification once node and group
3682 # locks have been acquired
3683 self.needed_locks[locking.LEVEL_INSTANCE] = \
3684 self.cfg.GetNodeGroupInstances(self.group_uuid)
3686 elif level == locking.LEVEL_NODEGROUP:
3687 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3689 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3690 set([self.group_uuid] +
3691 # Lock all groups used by instances optimistically; this requires
3692 # going via the node before it's locked, requiring verification
3695 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3696 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3698 elif level == locking.LEVEL_NODE:
3699 # This will only lock the nodes in the group to be verified which contain
3701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3702 self._LockInstancesNodes()
3704 # Lock all nodes in group to be verified
3705 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3706 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3707 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3709 def CheckPrereq(self):
3710 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3711 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3712 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3714 assert self.group_uuid in owned_groups
3716 # Check if locked instances are still correct
3717 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3719 # Get instance information
3720 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3722 # Check if node groups for locked instances are still correct
3723 _CheckInstancesNodeGroups(self.cfg, self.instances,
3724 owned_groups, owned_nodes, self.group_uuid)
3726 def Exec(self, feedback_fn):
3727 """Verify integrity of cluster disks.
3729 @rtype: tuple of three items
3730 @return: a tuple of (dict of node-to-node_error, list of instances
3731 which need activate-disks, dict of instance: (node, volume) for
3736 res_instances = set()
3739 nv_dict = _MapInstanceDisksToNodes(
3740 [inst for inst in self.instances.values()
3741 if inst.admin_state == constants.ADMINST_UP])
3744 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3745 set(self.cfg.GetVmCapableNodeList()))
3747 node_lvs = self.rpc.call_lv_list(nodes, [])
3749 for (node, node_res) in node_lvs.items():
3750 if node_res.offline:
3753 msg = node_res.fail_msg
3755 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3756 res_nodes[node] = msg
3759 for lv_name, (_, _, lv_online) in node_res.payload.items():
3760 inst = nv_dict.pop((node, lv_name), None)
3761 if not (lv_online or inst is None):
3762 res_instances.add(inst)
3764 # any leftover items in nv_dict are missing LVs, let's arrange the data
3766 for key, inst in nv_dict.iteritems():
3767 res_missing.setdefault(inst, []).append(list(key))
3769 return (res_nodes, list(res_instances), res_missing)
3772 class LUClusterRepairDiskSizes(NoHooksLU):
3773 """Verifies the cluster disks sizes.
3778 def ExpandNames(self):
3779 if self.op.instances:
3780 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3781 self.needed_locks = {
3782 locking.LEVEL_NODE_RES: [],
3783 locking.LEVEL_INSTANCE: self.wanted_names,
3785 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3787 self.wanted_names = None
3788 self.needed_locks = {
3789 locking.LEVEL_NODE_RES: locking.ALL_SET,
3790 locking.LEVEL_INSTANCE: locking.ALL_SET,
3792 self.share_locks = {
3793 locking.LEVEL_NODE_RES: 1,
3794 locking.LEVEL_INSTANCE: 0,
3797 def DeclareLocks(self, level):
3798 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3799 self._LockInstancesNodes(primary_only=True, level=level)
3801 def CheckPrereq(self):
3802 """Check prerequisites.
3804 This only checks the optional instance list against the existing names.
3807 if self.wanted_names is None:
3808 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3810 self.wanted_instances = \
3811 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3813 def _EnsureChildSizes(self, disk):
3814 """Ensure children of the disk have the needed disk size.
3816 This is valid mainly for DRBD8 and fixes an issue where the
3817 children have smaller disk size.
3819 @param disk: an L{ganeti.objects.Disk} object
3822 if disk.dev_type == constants.LD_DRBD8:
3823 assert disk.children, "Empty children for DRBD8?"
3824 fchild = disk.children[0]
3825 mismatch = fchild.size < disk.size
3827 self.LogInfo("Child disk has size %d, parent %d, fixing",
3828 fchild.size, disk.size)
3829 fchild.size = disk.size
3831 # and we recurse on this child only, not on the metadev
3832 return self._EnsureChildSizes(fchild) or mismatch
3836 def Exec(self, feedback_fn):
3837 """Verify the size of cluster disks.
3840 # TODO: check child disks too
3841 # TODO: check differences in size between primary/secondary nodes
3843 for instance in self.wanted_instances:
3844 pnode = instance.primary_node
3845 if pnode not in per_node_disks:
3846 per_node_disks[pnode] = []
3847 for idx, disk in enumerate(instance.disks):
3848 per_node_disks[pnode].append((instance, idx, disk))
3850 assert not (frozenset(per_node_disks.keys()) -
3851 self.owned_locks(locking.LEVEL_NODE_RES)), \
3852 "Not owning correct locks"
3853 assert not self.owned_locks(locking.LEVEL_NODE)
3856 for node, dskl in per_node_disks.items():
3857 newl = [v[2].Copy() for v in dskl]
3859 self.cfg.SetDiskID(dsk, node)
3860 result = self.rpc.call_blockdev_getsize(node, newl)
3862 self.LogWarning("Failure in blockdev_getsize call to node"
3863 " %s, ignoring", node)
3865 if len(result.payload) != len(dskl):
3866 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3867 " result.payload=%s", node, len(dskl), result.payload)
3868 self.LogWarning("Invalid result from node %s, ignoring node results",
3871 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3873 self.LogWarning("Disk %d of instance %s did not return size"
3874 " information, ignoring", idx, instance.name)
3876 if not isinstance(size, (int, long)):
3877 self.LogWarning("Disk %d of instance %s did not return valid"
3878 " size information, ignoring", idx, instance.name)
3881 if size != disk.size:
3882 self.LogInfo("Disk %d of instance %s has mismatched size,"
3883 " correcting: recorded %d, actual %d", idx,
3884 instance.name, disk.size, size)
3886 self.cfg.Update(instance, feedback_fn)
3887 changed.append((instance.name, idx, size))
3888 if self._EnsureChildSizes(disk):
3889 self.cfg.Update(instance, feedback_fn)
3890 changed.append((instance.name, idx, disk.size))
3894 class LUClusterRename(LogicalUnit):
3895 """Rename the cluster.
3898 HPATH = "cluster-rename"
3899 HTYPE = constants.HTYPE_CLUSTER
3901 def BuildHooksEnv(self):
3906 "OP_TARGET": self.cfg.GetClusterName(),
3907 "NEW_NAME": self.op.name,
3910 def BuildHooksNodes(self):
3911 """Build hooks nodes.
3914 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3916 def CheckPrereq(self):
3917 """Verify that the passed name is a valid one.
3920 hostname = netutils.GetHostname(name=self.op.name,
3921 family=self.cfg.GetPrimaryIPFamily())
3923 new_name = hostname.name
3924 self.ip = new_ip = hostname.ip
3925 old_name = self.cfg.GetClusterName()
3926 old_ip = self.cfg.GetMasterIP()
3927 if new_name == old_name and new_ip == old_ip:
3928 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3929 " cluster has changed",
3931 if new_ip != old_ip:
3932 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3933 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3934 " reachable on the network" %
3935 new_ip, errors.ECODE_NOTUNIQUE)
3937 self.op.name = new_name
3939 def Exec(self, feedback_fn):
3940 """Rename the cluster.
3943 clustername = self.op.name
3946 # shutdown the master IP
3947 master_params = self.cfg.GetMasterNetworkParameters()
3948 ems = self.cfg.GetUseExternalMipScript()
3949 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3951 result.Raise("Could not disable the master role")
3954 cluster = self.cfg.GetClusterInfo()
3955 cluster.cluster_name = clustername
3956 cluster.master_ip = new_ip
3957 self.cfg.Update(cluster, feedback_fn)
3959 # update the known hosts file
3960 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3961 node_list = self.cfg.GetOnlineNodeList()
3963 node_list.remove(master_params.name)
3966 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3968 master_params.ip = new_ip
3969 result = self.rpc.call_node_activate_master_ip(master_params.name,
3971 msg = result.fail_msg
3973 self.LogWarning("Could not re-enable the master role on"
3974 " the master, please restart manually: %s", msg)
3979 def _ValidateNetmask(cfg, netmask):
3980 """Checks if a netmask is valid.
3982 @type cfg: L{config.ConfigWriter}
3983 @param cfg: The cluster configuration
3985 @param netmask: the netmask to be verified
3986 @raise errors.OpPrereqError: if the validation fails
3989 ip_family = cfg.GetPrimaryIPFamily()
3991 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3992 except errors.ProgrammerError:
3993 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3994 ip_family, errors.ECODE_INVAL)
3995 if not ipcls.ValidateNetmask(netmask):
3996 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3997 (netmask), errors.ECODE_INVAL)
4000 class LUClusterSetParams(LogicalUnit):
4001 """Change the parameters of the cluster.
4004 HPATH = "cluster-modify"
4005 HTYPE = constants.HTYPE_CLUSTER
4008 def CheckArguments(self):
4012 if self.op.uid_pool:
4013 uidpool.CheckUidPool(self.op.uid_pool)
4015 if self.op.add_uids:
4016 uidpool.CheckUidPool(self.op.add_uids)
4018 if self.op.remove_uids:
4019 uidpool.CheckUidPool(self.op.remove_uids)
4021 if self.op.master_netmask is not None:
4022 _ValidateNetmask(self.cfg, self.op.master_netmask)
4024 if self.op.diskparams:
4025 for dt_params in self.op.diskparams.values():
4026 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4028 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4029 except errors.OpPrereqError, err:
4030 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4033 def ExpandNames(self):
4034 # FIXME: in the future maybe other cluster params won't require checking on
4035 # all nodes to be modified.
4036 self.needed_locks = {
4037 locking.LEVEL_NODE: locking.ALL_SET,
4038 locking.LEVEL_INSTANCE: locking.ALL_SET,
4039 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4041 self.share_locks = {
4042 locking.LEVEL_NODE: 1,
4043 locking.LEVEL_INSTANCE: 1,
4044 locking.LEVEL_NODEGROUP: 1,
4047 def BuildHooksEnv(self):
4052 "OP_TARGET": self.cfg.GetClusterName(),
4053 "NEW_VG_NAME": self.op.vg_name,
4056 def BuildHooksNodes(self):
4057 """Build hooks nodes.
4060 mn = self.cfg.GetMasterNode()
4063 def CheckPrereq(self):
4064 """Check prerequisites.
4066 This checks whether the given params don't conflict and
4067 if the given volume group is valid.
4070 if self.op.vg_name is not None and not self.op.vg_name:
4071 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4072 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4073 " instances exist", errors.ECODE_INVAL)
4075 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4076 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4077 raise errors.OpPrereqError("Cannot disable drbd helper while"
4078 " drbd-based instances exist",
4081 node_list = self.owned_locks(locking.LEVEL_NODE)
4083 # if vg_name not None, checks given volume group on all nodes
4085 vglist = self.rpc.call_vg_list(node_list)
4086 for node in node_list:
4087 msg = vglist[node].fail_msg
4089 # ignoring down node
4090 self.LogWarning("Error while gathering data on node %s"
4091 " (ignoring node): %s", node, msg)
4093 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4095 constants.MIN_VG_SIZE)
4097 raise errors.OpPrereqError("Error on node '%s': %s" %
4098 (node, vgstatus), errors.ECODE_ENVIRON)
4100 if self.op.drbd_helper:
4101 # checks given drbd helper on all nodes
4102 helpers = self.rpc.call_drbd_helper(node_list)
4103 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4105 self.LogInfo("Not checking drbd helper on offline node %s", node)
4107 msg = helpers[node].fail_msg
4109 raise errors.OpPrereqError("Error checking drbd helper on node"
4110 " '%s': %s" % (node, msg),
4111 errors.ECODE_ENVIRON)
4112 node_helper = helpers[node].payload
4113 if node_helper != self.op.drbd_helper:
4114 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4115 (node, node_helper), errors.ECODE_ENVIRON)
4117 self.cluster = cluster = self.cfg.GetClusterInfo()
4118 # validate params changes
4119 if self.op.beparams:
4120 objects.UpgradeBeParams(self.op.beparams)
4121 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4122 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4124 if self.op.ndparams:
4125 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4126 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4128 # TODO: we need a more general way to handle resetting
4129 # cluster-level parameters to default values
4130 if self.new_ndparams["oob_program"] == "":
4131 self.new_ndparams["oob_program"] = \
4132 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4134 if self.op.hv_state:
4135 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4136 self.cluster.hv_state_static)
4137 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4138 for hv, values in new_hv_state.items())
4140 if self.op.disk_state:
4141 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4142 self.cluster.disk_state_static)
4143 self.new_disk_state = \
4144 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4145 for name, values in svalues.items()))
4146 for storage, svalues in new_disk_state.items())
4149 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4152 all_instances = self.cfg.GetAllInstancesInfo().values()
4154 for group in self.cfg.GetAllNodeGroupsInfo().values():
4155 instances = frozenset([inst for inst in all_instances
4156 if compat.any(node in group.members
4157 for node in inst.all_nodes)])
4158 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4159 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4160 new = _ComputeNewInstanceViolations(ipol,
4161 new_ipolicy, instances)
4163 violations.update(new)
4166 self.LogWarning("After the ipolicy change the following instances"
4167 " violate them: %s",
4168 utils.CommaJoin(utils.NiceSort(violations)))
4170 if self.op.nicparams:
4171 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4172 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4173 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4176 # check all instances for consistency
4177 for instance in self.cfg.GetAllInstancesInfo().values():
4178 for nic_idx, nic in enumerate(instance.nics):
4179 params_copy = copy.deepcopy(nic.nicparams)
4180 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4182 # check parameter syntax
4184 objects.NIC.CheckParameterSyntax(params_filled)
4185 except errors.ConfigurationError, err:
4186 nic_errors.append("Instance %s, nic/%d: %s" %
4187 (instance.name, nic_idx, err))
4189 # if we're moving instances to routed, check that they have an ip
4190 target_mode = params_filled[constants.NIC_MODE]
4191 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4192 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4193 " address" % (instance.name, nic_idx))
4195 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4196 "\n".join(nic_errors), errors.ECODE_INVAL)
4198 # hypervisor list/parameters
4199 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4200 if self.op.hvparams:
4201 for hv_name, hv_dict in self.op.hvparams.items():
4202 if hv_name not in self.new_hvparams:
4203 self.new_hvparams[hv_name] = hv_dict
4205 self.new_hvparams[hv_name].update(hv_dict)
4207 # disk template parameters
4208 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4209 if self.op.diskparams:
4210 for dt_name, dt_params in self.op.diskparams.items():
4211 if dt_name not in self.op.diskparams:
4212 self.new_diskparams[dt_name] = dt_params
4214 self.new_diskparams[dt_name].update(dt_params)
4216 # os hypervisor parameters
4217 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4219 for os_name, hvs in self.op.os_hvp.items():
4220 if os_name not in self.new_os_hvp:
4221 self.new_os_hvp[os_name] = hvs
4223 for hv_name, hv_dict in hvs.items():
4224 if hv_name not in self.new_os_hvp[os_name]:
4225 self.new_os_hvp[os_name][hv_name] = hv_dict
4227 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4230 self.new_osp = objects.FillDict(cluster.osparams, {})
4231 if self.op.osparams:
4232 for os_name, osp in self.op.osparams.items():
4233 if os_name not in self.new_osp:
4234 self.new_osp[os_name] = {}
4236 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4239 if not self.new_osp[os_name]:
4240 # we removed all parameters
4241 del self.new_osp[os_name]
4243 # check the parameter validity (remote check)
4244 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4245 os_name, self.new_osp[os_name])
4247 # changes to the hypervisor list
4248 if self.op.enabled_hypervisors is not None:
4249 self.hv_list = self.op.enabled_hypervisors
4250 for hv in self.hv_list:
4251 # if the hypervisor doesn't already exist in the cluster
4252 # hvparams, we initialize it to empty, and then (in both
4253 # cases) we make sure to fill the defaults, as we might not
4254 # have a complete defaults list if the hypervisor wasn't
4256 if hv not in new_hvp:
4258 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4259 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4261 self.hv_list = cluster.enabled_hypervisors
4263 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4264 # either the enabled list has changed, or the parameters have, validate
4265 for hv_name, hv_params in self.new_hvparams.items():
4266 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4267 (self.op.enabled_hypervisors and
4268 hv_name in self.op.enabled_hypervisors)):
4269 # either this is a new hypervisor, or its parameters have changed
4270 hv_class = hypervisor.GetHypervisor(hv_name)
4271 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4272 hv_class.CheckParameterSyntax(hv_params)
4273 _CheckHVParams(self, node_list, hv_name, hv_params)
4276 # no need to check any newly-enabled hypervisors, since the
4277 # defaults have already been checked in the above code-block
4278 for os_name, os_hvp in self.new_os_hvp.items():
4279 for hv_name, hv_params in os_hvp.items():
4280 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4281 # we need to fill in the new os_hvp on top of the actual hv_p
4282 cluster_defaults = self.new_hvparams.get(hv_name, {})
4283 new_osp = objects.FillDict(cluster_defaults, hv_params)
4284 hv_class = hypervisor.GetHypervisor(hv_name)
4285 hv_class.CheckParameterSyntax(new_osp)
4286 _CheckHVParams(self, node_list, hv_name, new_osp)
4288 if self.op.default_iallocator:
4289 alloc_script = utils.FindFile(self.op.default_iallocator,
4290 constants.IALLOCATOR_SEARCH_PATH,
4292 if alloc_script is None:
4293 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4294 " specified" % self.op.default_iallocator,
4297 def Exec(self, feedback_fn):
4298 """Change the parameters of the cluster.
4301 if self.op.vg_name is not None:
4302 new_volume = self.op.vg_name
4305 if new_volume != self.cfg.GetVGName():
4306 self.cfg.SetVGName(new_volume)
4308 feedback_fn("Cluster LVM configuration already in desired"
4309 " state, not changing")
4310 if self.op.drbd_helper is not None:
4311 new_helper = self.op.drbd_helper
4314 if new_helper != self.cfg.GetDRBDHelper():
4315 self.cfg.SetDRBDHelper(new_helper)
4317 feedback_fn("Cluster DRBD helper already in desired state,"
4319 if self.op.hvparams:
4320 self.cluster.hvparams = self.new_hvparams
4322 self.cluster.os_hvp = self.new_os_hvp
4323 if self.op.enabled_hypervisors is not None:
4324 self.cluster.hvparams = self.new_hvparams
4325 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4326 if self.op.beparams:
4327 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4328 if self.op.nicparams:
4329 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4331 self.cluster.ipolicy = self.new_ipolicy
4332 if self.op.osparams:
4333 self.cluster.osparams = self.new_osp
4334 if self.op.ndparams:
4335 self.cluster.ndparams = self.new_ndparams
4336 if self.op.diskparams:
4337 self.cluster.diskparams = self.new_diskparams
4338 if self.op.hv_state:
4339 self.cluster.hv_state_static = self.new_hv_state
4340 if self.op.disk_state:
4341 self.cluster.disk_state_static = self.new_disk_state
4343 if self.op.candidate_pool_size is not None:
4344 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4345 # we need to update the pool size here, otherwise the save will fail
4346 _AdjustCandidatePool(self, [])
4348 if self.op.maintain_node_health is not None:
4349 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4350 feedback_fn("Note: CONFD was disabled at build time, node health"
4351 " maintenance is not useful (still enabling it)")
4352 self.cluster.maintain_node_health = self.op.maintain_node_health
4354 if self.op.prealloc_wipe_disks is not None:
4355 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4357 if self.op.add_uids is not None:
4358 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4360 if self.op.remove_uids is not None:
4361 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4363 if self.op.uid_pool is not None:
4364 self.cluster.uid_pool = self.op.uid_pool
4366 if self.op.default_iallocator is not None:
4367 self.cluster.default_iallocator = self.op.default_iallocator
4369 if self.op.reserved_lvs is not None:
4370 self.cluster.reserved_lvs = self.op.reserved_lvs
4372 if self.op.use_external_mip_script is not None:
4373 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4375 def helper_os(aname, mods, desc):
4377 lst = getattr(self.cluster, aname)
4378 for key, val in mods:
4379 if key == constants.DDM_ADD:
4381 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4384 elif key == constants.DDM_REMOVE:
4388 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4390 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4392 if self.op.hidden_os:
4393 helper_os("hidden_os", self.op.hidden_os, "hidden")
4395 if self.op.blacklisted_os:
4396 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4398 if self.op.master_netdev:
4399 master_params = self.cfg.GetMasterNetworkParameters()
4400 ems = self.cfg.GetUseExternalMipScript()
4401 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4402 self.cluster.master_netdev)
4403 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4405 result.Raise("Could not disable the master ip")
4406 feedback_fn("Changing master_netdev from %s to %s" %
4407 (master_params.netdev, self.op.master_netdev))
4408 self.cluster.master_netdev = self.op.master_netdev
4410 if self.op.master_netmask:
4411 master_params = self.cfg.GetMasterNetworkParameters()
4412 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4413 result = self.rpc.call_node_change_master_netmask(master_params.name,
4414 master_params.netmask,
4415 self.op.master_netmask,
4417 master_params.netdev)
4419 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4422 self.cluster.master_netmask = self.op.master_netmask
4424 self.cfg.Update(self.cluster, feedback_fn)
4426 if self.op.master_netdev:
4427 master_params = self.cfg.GetMasterNetworkParameters()
4428 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4429 self.op.master_netdev)
4430 ems = self.cfg.GetUseExternalMipScript()
4431 result = self.rpc.call_node_activate_master_ip(master_params.name,
4434 self.LogWarning("Could not re-enable the master ip on"
4435 " the master, please restart manually: %s",
4439 def _UploadHelper(lu, nodes, fname):
4440 """Helper for uploading a file and showing warnings.
4443 if os.path.exists(fname):
4444 result = lu.rpc.call_upload_file(nodes, fname)
4445 for to_node, to_result in result.items():
4446 msg = to_result.fail_msg
4448 msg = ("Copy of file %s to node %s failed: %s" %
4449 (fname, to_node, msg))
4450 lu.proc.LogWarning(msg)
4453 def _ComputeAncillaryFiles(cluster, redist):
4454 """Compute files external to Ganeti which need to be consistent.
4456 @type redist: boolean
4457 @param redist: Whether to include files which need to be redistributed
4460 # Compute files for all nodes
4462 pathutils.SSH_KNOWN_HOSTS_FILE,
4463 pathutils.CONFD_HMAC_KEY,
4464 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4465 pathutils.SPICE_CERT_FILE,
4466 pathutils.SPICE_CACERT_FILE,
4467 pathutils.RAPI_USERS_FILE,
4471 # we need to ship at least the RAPI certificate
4472 files_all.add(pathutils.RAPI_CERT_FILE)
4474 files_all.update(pathutils.ALL_CERT_FILES)
4475 files_all.update(ssconf.SimpleStore().GetFileList())
4477 if cluster.modify_etc_hosts:
4478 files_all.add(pathutils.ETC_HOSTS)
4480 if cluster.use_external_mip_script:
4481 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4483 # Files which are optional, these must:
4484 # - be present in one other category as well
4485 # - either exist or not exist on all nodes of that category (mc, vm all)
4487 pathutils.RAPI_USERS_FILE,
4490 # Files which should only be on master candidates
4494 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4498 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4499 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4500 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4502 # Files which should only be on VM-capable nodes
4505 for hv_name in cluster.enabled_hypervisors
4506 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4510 for hv_name in cluster.enabled_hypervisors
4511 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4513 # Filenames in each category must be unique
4514 all_files_set = files_all | files_mc | files_vm
4515 assert (len(all_files_set) ==
4516 sum(map(len, [files_all, files_mc, files_vm]))), \
4517 "Found file listed in more than one file list"
4519 # Optional files must be present in one other category
4520 assert all_files_set.issuperset(files_opt), \
4521 "Optional file not in a different required list"
4523 # This one file should never ever be re-distributed via RPC
4524 assert not (redist and
4525 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4527 return (files_all, files_opt, files_mc, files_vm)
4530 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4531 """Distribute additional files which are part of the cluster configuration.
4533 ConfigWriter takes care of distributing the config and ssconf files, but
4534 there are more files which should be distributed to all nodes. This function
4535 makes sure those are copied.
4537 @param lu: calling logical unit
4538 @param additional_nodes: list of nodes not in the config to distribute to
4539 @type additional_vm: boolean
4540 @param additional_vm: whether the additional nodes are vm-capable or not
4543 # Gather target nodes
4544 cluster = lu.cfg.GetClusterInfo()
4545 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4547 online_nodes = lu.cfg.GetOnlineNodeList()
4548 online_set = frozenset(online_nodes)
4549 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4551 if additional_nodes is not None:
4552 online_nodes.extend(additional_nodes)
4554 vm_nodes.extend(additional_nodes)
4556 # Never distribute to master node
4557 for nodelist in [online_nodes, vm_nodes]:
4558 if master_info.name in nodelist:
4559 nodelist.remove(master_info.name)
4562 (files_all, _, files_mc, files_vm) = \
4563 _ComputeAncillaryFiles(cluster, True)
4565 # Never re-distribute configuration file from here
4566 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4567 pathutils.CLUSTER_CONF_FILE in files_vm)
4568 assert not files_mc, "Master candidates not handled in this function"
4571 (online_nodes, files_all),
4572 (vm_nodes, files_vm),
4576 for (node_list, files) in filemap:
4578 _UploadHelper(lu, node_list, fname)
4581 class LUClusterRedistConf(NoHooksLU):
4582 """Force the redistribution of cluster configuration.
4584 This is a very simple LU.
4589 def ExpandNames(self):
4590 self.needed_locks = {
4591 locking.LEVEL_NODE: locking.ALL_SET,
4593 self.share_locks[locking.LEVEL_NODE] = 1
4595 def Exec(self, feedback_fn):
4596 """Redistribute the configuration.
4599 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4600 _RedistributeAncillaryFiles(self)
4603 class LUClusterActivateMasterIp(NoHooksLU):
4604 """Activate the master IP on the master node.
4607 def Exec(self, feedback_fn):
4608 """Activate the master IP.
4611 master_params = self.cfg.GetMasterNetworkParameters()
4612 ems = self.cfg.GetUseExternalMipScript()
4613 result = self.rpc.call_node_activate_master_ip(master_params.name,
4615 result.Raise("Could not activate the master IP")
4618 class LUClusterDeactivateMasterIp(NoHooksLU):
4619 """Deactivate the master IP on the master node.
4622 def Exec(self, feedback_fn):
4623 """Deactivate the master IP.
4626 master_params = self.cfg.GetMasterNetworkParameters()
4627 ems = self.cfg.GetUseExternalMipScript()
4628 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4630 result.Raise("Could not deactivate the master IP")
4633 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4634 """Sleep and poll for an instance's disk to sync.
4637 if not instance.disks or disks is not None and not disks:
4640 disks = _ExpandCheckDisks(instance, disks)
4643 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4645 node = instance.primary_node
4648 lu.cfg.SetDiskID(dev, node)
4650 # TODO: Convert to utils.Retry
4653 degr_retries = 10 # in seconds, as we sleep 1 second each time
4657 cumul_degraded = False
4658 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4659 msg = rstats.fail_msg
4661 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4664 raise errors.RemoteError("Can't contact node %s for mirror data,"
4665 " aborting." % node)
4668 rstats = rstats.payload
4670 for i, mstat in enumerate(rstats):
4672 lu.LogWarning("Can't compute data for node %s/%s",
4673 node, disks[i].iv_name)
4676 cumul_degraded = (cumul_degraded or
4677 (mstat.is_degraded and mstat.sync_percent is None))
4678 if mstat.sync_percent is not None:
4680 if mstat.estimated_time is not None:
4681 rem_time = ("%s remaining (estimated)" %
4682 utils.FormatSeconds(mstat.estimated_time))
4683 max_time = mstat.estimated_time
4685 rem_time = "no time estimate"
4686 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4687 (disks[i].iv_name, mstat.sync_percent, rem_time))
4689 # if we're done but degraded, let's do a few small retries, to
4690 # make sure we see a stable and not transient situation; therefore
4691 # we force restart of the loop
4692 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4693 logging.info("Degraded disks found, %d retries left", degr_retries)
4701 time.sleep(min(60, max_time))
4704 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4705 return not cumul_degraded
4708 def _BlockdevFind(lu, node, dev, instance):
4709 """Wrapper around call_blockdev_find to annotate diskparams.
4711 @param lu: A reference to the lu object
4712 @param node: The node to call out
4713 @param dev: The device to find
4714 @param instance: The instance object the device belongs to
4715 @returns The result of the rpc call
4718 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4719 return lu.rpc.call_blockdev_find(node, disk)
4722 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4723 """Wrapper around L{_CheckDiskConsistencyInner}.
4726 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4727 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4731 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4733 """Check that mirrors are not degraded.
4735 @attention: The device has to be annotated already.
4737 The ldisk parameter, if True, will change the test from the
4738 is_degraded attribute (which represents overall non-ok status for
4739 the device(s)) to the ldisk (representing the local storage status).
4742 lu.cfg.SetDiskID(dev, node)
4746 if on_primary or dev.AssembleOnSecondary():
4747 rstats = lu.rpc.call_blockdev_find(node, dev)
4748 msg = rstats.fail_msg
4750 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4752 elif not rstats.payload:
4753 lu.LogWarning("Can't find disk on node %s", node)
4757 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4759 result = result and not rstats.payload.is_degraded
4762 for child in dev.children:
4763 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4769 class LUOobCommand(NoHooksLU):
4770 """Logical unit for OOB handling.
4774 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4776 def ExpandNames(self):
4777 """Gather locks we need.
4780 if self.op.node_names:
4781 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4782 lock_names = self.op.node_names
4784 lock_names = locking.ALL_SET
4786 self.needed_locks = {
4787 locking.LEVEL_NODE: lock_names,
4790 def CheckPrereq(self):
4791 """Check prerequisites.
4794 - the node exists in the configuration
4797 Any errors are signaled by raising errors.OpPrereqError.
4801 self.master_node = self.cfg.GetMasterNode()
4803 assert self.op.power_delay >= 0.0
4805 if self.op.node_names:
4806 if (self.op.command in self._SKIP_MASTER and
4807 self.master_node in self.op.node_names):
4808 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4809 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4811 if master_oob_handler:
4812 additional_text = ("run '%s %s %s' if you want to operate on the"
4813 " master regardless") % (master_oob_handler,
4817 additional_text = "it does not support out-of-band operations"
4819 raise errors.OpPrereqError(("Operating on the master node %s is not"
4820 " allowed for %s; %s") %
4821 (self.master_node, self.op.command,
4822 additional_text), errors.ECODE_INVAL)
4824 self.op.node_names = self.cfg.GetNodeList()
4825 if self.op.command in self._SKIP_MASTER:
4826 self.op.node_names.remove(self.master_node)
4828 if self.op.command in self._SKIP_MASTER:
4829 assert self.master_node not in self.op.node_names
4831 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4833 raise errors.OpPrereqError("Node %s not found" % node_name,
4836 self.nodes.append(node)
4838 if (not self.op.ignore_status and
4839 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4840 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4841 " not marked offline") % node_name,
4844 def Exec(self, feedback_fn):
4845 """Execute OOB and return result if we expect any.
4848 master_node = self.master_node
4851 for idx, node in enumerate(utils.NiceSort(self.nodes,
4852 key=lambda node: node.name)):
4853 node_entry = [(constants.RS_NORMAL, node.name)]
4854 ret.append(node_entry)
4856 oob_program = _SupportsOob(self.cfg, node)
4859 node_entry.append((constants.RS_UNAVAIL, None))
4862 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4863 self.op.command, oob_program, node.name)
4864 result = self.rpc.call_run_oob(master_node, oob_program,
4865 self.op.command, node.name,
4869 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4870 node.name, result.fail_msg)
4871 node_entry.append((constants.RS_NODATA, None))
4874 self._CheckPayload(result)
4875 except errors.OpExecError, err:
4876 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4878 node_entry.append((constants.RS_NODATA, None))
4880 if self.op.command == constants.OOB_HEALTH:
4881 # For health we should log important events
4882 for item, status in result.payload:
4883 if status in [constants.OOB_STATUS_WARNING,
4884 constants.OOB_STATUS_CRITICAL]:
4885 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4886 item, node.name, status)
4888 if self.op.command == constants.OOB_POWER_ON:
4890 elif self.op.command == constants.OOB_POWER_OFF:
4891 node.powered = False
4892 elif self.op.command == constants.OOB_POWER_STATUS:
4893 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4894 if powered != node.powered:
4895 logging.warning(("Recorded power state (%s) of node '%s' does not"
4896 " match actual power state (%s)"), node.powered,
4899 # For configuration changing commands we should update the node
4900 if self.op.command in (constants.OOB_POWER_ON,
4901 constants.OOB_POWER_OFF):
4902 self.cfg.Update(node, feedback_fn)
4904 node_entry.append((constants.RS_NORMAL, result.payload))
4906 if (self.op.command == constants.OOB_POWER_ON and
4907 idx < len(self.nodes) - 1):
4908 time.sleep(self.op.power_delay)
4912 def _CheckPayload(self, result):
4913 """Checks if the payload is valid.
4915 @param result: RPC result
4916 @raises errors.OpExecError: If payload is not valid
4920 if self.op.command == constants.OOB_HEALTH:
4921 if not isinstance(result.payload, list):
4922 errs.append("command 'health' is expected to return a list but got %s" %
4923 type(result.payload))
4925 for item, status in result.payload:
4926 if status not in constants.OOB_STATUSES:
4927 errs.append("health item '%s' has invalid status '%s'" %
4930 if self.op.command == constants.OOB_POWER_STATUS:
4931 if not isinstance(result.payload, dict):
4932 errs.append("power-status is expected to return a dict but got %s" %
4933 type(result.payload))
4935 if self.op.command in [
4936 constants.OOB_POWER_ON,
4937 constants.OOB_POWER_OFF,
4938 constants.OOB_POWER_CYCLE,
4940 if result.payload is not None:
4941 errs.append("%s is expected to not return payload but got '%s'" %
4942 (self.op.command, result.payload))
4945 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4946 utils.CommaJoin(errs))
4949 class _OsQuery(_QueryBase):
4950 FIELDS = query.OS_FIELDS
4952 def ExpandNames(self, lu):
4953 # Lock all nodes in shared mode
4954 # Temporary removal of locks, should be reverted later
4955 # TODO: reintroduce locks when they are lighter-weight
4956 lu.needed_locks = {}
4957 #self.share_locks[locking.LEVEL_NODE] = 1
4958 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4960 # The following variables interact with _QueryBase._GetNames
4962 self.wanted = self.names
4964 self.wanted = locking.ALL_SET
4966 self.do_locking = self.use_locking
4968 def DeclareLocks(self, lu, level):
4972 def _DiagnoseByOS(rlist):
4973 """Remaps a per-node return list into an a per-os per-node dictionary
4975 @param rlist: a map with node names as keys and OS objects as values
4978 @return: a dictionary with osnames as keys and as value another
4979 map, with nodes as keys and tuples of (path, status, diagnose,
4980 variants, parameters, api_versions) as values, eg::
4982 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4983 (/srv/..., False, "invalid api")],
4984 "node2": [(/srv/..., True, "", [], [])]}
4989 # we build here the list of nodes that didn't fail the RPC (at RPC
4990 # level), so that nodes with a non-responding node daemon don't
4991 # make all OSes invalid
4992 good_nodes = [node_name for node_name in rlist
4993 if not rlist[node_name].fail_msg]
4994 for node_name, nr in rlist.items():
4995 if nr.fail_msg or not nr.payload:
4997 for (name, path, status, diagnose, variants,
4998 params, api_versions) in nr.payload:
4999 if name not in all_os:
5000 # build a list of nodes for this os containing empty lists
5001 # for each node in node_list
5003 for nname in good_nodes:
5004 all_os[name][nname] = []
5005 # convert params from [name, help] to (name, help)
5006 params = [tuple(v) for v in params]
5007 all_os[name][node_name].append((path, status, diagnose,
5008 variants, params, api_versions))
5011 def _GetQueryData(self, lu):
5012 """Computes the list of nodes and their attributes.
5015 # Locking is not used
5016 assert not (compat.any(lu.glm.is_owned(level)
5017 for level in locking.LEVELS
5018 if level != locking.LEVEL_CLUSTER) or
5019 self.do_locking or self.use_locking)
5021 valid_nodes = [node.name
5022 for node in lu.cfg.GetAllNodesInfo().values()
5023 if not node.offline and node.vm_capable]
5024 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5025 cluster = lu.cfg.GetClusterInfo()
5029 for (os_name, os_data) in pol.items():
5030 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5031 hidden=(os_name in cluster.hidden_os),
5032 blacklisted=(os_name in cluster.blacklisted_os))
5036 api_versions = set()
5038 for idx, osl in enumerate(os_data.values()):
5039 info.valid = bool(info.valid and osl and osl[0][1])
5043 (node_variants, node_params, node_api) = osl[0][3:6]
5046 variants.update(node_variants)
5047 parameters.update(node_params)
5048 api_versions.update(node_api)
5050 # Filter out inconsistent values
5051 variants.intersection_update(node_variants)
5052 parameters.intersection_update(node_params)
5053 api_versions.intersection_update(node_api)
5055 info.variants = list(variants)
5056 info.parameters = list(parameters)
5057 info.api_versions = list(api_versions)
5059 data[os_name] = info
5061 # Prepare data in requested order
5062 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5066 class LUOsDiagnose(NoHooksLU):
5067 """Logical unit for OS diagnose/query.
5073 def _BuildFilter(fields, names):
5074 """Builds a filter for querying OSes.
5077 name_filter = qlang.MakeSimpleFilter("name", names)
5079 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5080 # respective field is not requested
5081 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5082 for fname in ["hidden", "blacklisted"]
5083 if fname not in fields]
5084 if "valid" not in fields:
5085 status_filter.append([qlang.OP_TRUE, "valid"])
5088 status_filter.insert(0, qlang.OP_AND)
5090 status_filter = None
5092 if name_filter and status_filter:
5093 return [qlang.OP_AND, name_filter, status_filter]
5097 return status_filter
5099 def CheckArguments(self):
5100 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5101 self.op.output_fields, False)
5103 def ExpandNames(self):
5104 self.oq.ExpandNames(self)
5106 def Exec(self, feedback_fn):
5107 return self.oq.OldStyleQuery(self)
5110 class LUNodeRemove(LogicalUnit):
5111 """Logical unit for removing a node.
5114 HPATH = "node-remove"
5115 HTYPE = constants.HTYPE_NODE
5117 def BuildHooksEnv(self):
5122 "OP_TARGET": self.op.node_name,
5123 "NODE_NAME": self.op.node_name,
5126 def BuildHooksNodes(self):
5127 """Build hooks nodes.
5129 This doesn't run on the target node in the pre phase as a failed
5130 node would then be impossible to remove.
5133 all_nodes = self.cfg.GetNodeList()
5135 all_nodes.remove(self.op.node_name)
5138 return (all_nodes, all_nodes)
5140 def CheckPrereq(self):
5141 """Check prerequisites.
5144 - the node exists in the configuration
5145 - it does not have primary or secondary instances
5146 - it's not the master
5148 Any errors are signaled by raising errors.OpPrereqError.
5151 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5152 node = self.cfg.GetNodeInfo(self.op.node_name)
5153 assert node is not None
5155 masternode = self.cfg.GetMasterNode()
5156 if node.name == masternode:
5157 raise errors.OpPrereqError("Node is the master node, failover to another"
5158 " node is required", errors.ECODE_INVAL)
5160 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5161 if node.name in instance.all_nodes:
5162 raise errors.OpPrereqError("Instance %s is still running on the node,"
5163 " please remove first" % instance_name,
5165 self.op.node_name = node.name
5168 def Exec(self, feedback_fn):
5169 """Removes the node from the cluster.
5173 logging.info("Stopping the node daemon and removing configs from node %s",
5176 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5178 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5181 # Promote nodes to master candidate as needed
5182 _AdjustCandidatePool(self, exceptions=[node.name])
5183 self.context.RemoveNode(node.name)
5185 # Run post hooks on the node before it's removed
5186 _RunPostHook(self, node.name)
5188 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5189 msg = result.fail_msg
5191 self.LogWarning("Errors encountered on the remote node while leaving"
5192 " the cluster: %s", msg)
5194 # Remove node from our /etc/hosts
5195 if self.cfg.GetClusterInfo().modify_etc_hosts:
5196 master_node = self.cfg.GetMasterNode()
5197 result = self.rpc.call_etc_hosts_modify(master_node,
5198 constants.ETC_HOSTS_REMOVE,
5200 result.Raise("Can't update hosts file with new host data")
5201 _RedistributeAncillaryFiles(self)
5204 class _NodeQuery(_QueryBase):
5205 FIELDS = query.NODE_FIELDS
5207 def ExpandNames(self, lu):
5208 lu.needed_locks = {}
5209 lu.share_locks = _ShareAll()
5212 self.wanted = _GetWantedNodes(lu, self.names)
5214 self.wanted = locking.ALL_SET
5216 self.do_locking = (self.use_locking and
5217 query.NQ_LIVE in self.requested_data)
5220 # If any non-static field is requested we need to lock the nodes
5221 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5223 def DeclareLocks(self, lu, level):
5226 def _GetQueryData(self, lu):
5227 """Computes the list of nodes and their attributes.
5230 all_info = lu.cfg.GetAllNodesInfo()
5232 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5234 # Gather data as requested
5235 if query.NQ_LIVE in self.requested_data:
5236 # filter out non-vm_capable nodes
5237 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5239 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5240 [lu.cfg.GetHypervisorType()])
5241 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5242 for (name, nresult) in node_data.items()
5243 if not nresult.fail_msg and nresult.payload)
5247 if query.NQ_INST in self.requested_data:
5248 node_to_primary = dict([(name, set()) for name in nodenames])
5249 node_to_secondary = dict([(name, set()) for name in nodenames])
5251 inst_data = lu.cfg.GetAllInstancesInfo()
5253 for inst in inst_data.values():
5254 if inst.primary_node in node_to_primary:
5255 node_to_primary[inst.primary_node].add(inst.name)
5256 for secnode in inst.secondary_nodes:
5257 if secnode in node_to_secondary:
5258 node_to_secondary[secnode].add(inst.name)
5260 node_to_primary = None
5261 node_to_secondary = None
5263 if query.NQ_OOB in self.requested_data:
5264 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5265 for name, node in all_info.iteritems())
5269 if query.NQ_GROUP in self.requested_data:
5270 groups = lu.cfg.GetAllNodeGroupsInfo()
5274 return query.NodeQueryData([all_info[name] for name in nodenames],
5275 live_data, lu.cfg.GetMasterNode(),
5276 node_to_primary, node_to_secondary, groups,
5277 oob_support, lu.cfg.GetClusterInfo())
5280 class LUNodeQuery(NoHooksLU):
5281 """Logical unit for querying nodes.
5284 # pylint: disable=W0142
5287 def CheckArguments(self):
5288 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5289 self.op.output_fields, self.op.use_locking)
5291 def ExpandNames(self):
5292 self.nq.ExpandNames(self)
5294 def DeclareLocks(self, level):
5295 self.nq.DeclareLocks(self, level)
5297 def Exec(self, feedback_fn):
5298 return self.nq.OldStyleQuery(self)
5301 class LUNodeQueryvols(NoHooksLU):
5302 """Logical unit for getting volumes on node(s).
5306 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5307 _FIELDS_STATIC = utils.FieldSet("node")
5309 def CheckArguments(self):
5310 _CheckOutputFields(static=self._FIELDS_STATIC,
5311 dynamic=self._FIELDS_DYNAMIC,
5312 selected=self.op.output_fields)
5314 def ExpandNames(self):
5315 self.share_locks = _ShareAll()
5316 self.needed_locks = {}
5318 if not self.op.nodes:
5319 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5321 self.needed_locks[locking.LEVEL_NODE] = \
5322 _GetWantedNodes(self, self.op.nodes)
5324 def Exec(self, feedback_fn):
5325 """Computes the list of nodes and their attributes.
5328 nodenames = self.owned_locks(locking.LEVEL_NODE)
5329 volumes = self.rpc.call_node_volumes(nodenames)
5331 ilist = self.cfg.GetAllInstancesInfo()
5332 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5335 for node in nodenames:
5336 nresult = volumes[node]
5339 msg = nresult.fail_msg
5341 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5344 node_vols = sorted(nresult.payload,
5345 key=operator.itemgetter("dev"))
5347 for vol in node_vols:
5349 for field in self.op.output_fields:
5352 elif field == "phys":
5356 elif field == "name":
5358 elif field == "size":
5359 val = int(float(vol["size"]))
5360 elif field == "instance":
5361 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5363 raise errors.ParameterError(field)
5364 node_output.append(str(val))
5366 output.append(node_output)
5371 class LUNodeQueryStorage(NoHooksLU):
5372 """Logical unit for getting information on storage units on node(s).
5375 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5378 def CheckArguments(self):
5379 _CheckOutputFields(static=self._FIELDS_STATIC,
5380 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5381 selected=self.op.output_fields)
5383 def ExpandNames(self):
5384 self.share_locks = _ShareAll()
5385 self.needed_locks = {}
5388 self.needed_locks[locking.LEVEL_NODE] = \
5389 _GetWantedNodes(self, self.op.nodes)
5391 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5393 def Exec(self, feedback_fn):
5394 """Computes the list of nodes and their attributes.
5397 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5399 # Always get name to sort by
5400 if constants.SF_NAME in self.op.output_fields:
5401 fields = self.op.output_fields[:]
5403 fields = [constants.SF_NAME] + self.op.output_fields
5405 # Never ask for node or type as it's only known to the LU
5406 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5407 while extra in fields:
5408 fields.remove(extra)
5410 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5411 name_idx = field_idx[constants.SF_NAME]
5413 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5414 data = self.rpc.call_storage_list(self.nodes,
5415 self.op.storage_type, st_args,
5416 self.op.name, fields)
5420 for node in utils.NiceSort(self.nodes):
5421 nresult = data[node]
5425 msg = nresult.fail_msg
5427 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5430 rows = dict([(row[name_idx], row) for row in nresult.payload])
5432 for name in utils.NiceSort(rows.keys()):
5437 for field in self.op.output_fields:
5438 if field == constants.SF_NODE:
5440 elif field == constants.SF_TYPE:
5441 val = self.op.storage_type
5442 elif field in field_idx:
5443 val = row[field_idx[field]]
5445 raise errors.ParameterError(field)
5454 class _InstanceQuery(_QueryBase):
5455 FIELDS = query.INSTANCE_FIELDS
5457 def ExpandNames(self, lu):
5458 lu.needed_locks = {}
5459 lu.share_locks = _ShareAll()
5462 self.wanted = _GetWantedInstances(lu, self.names)
5464 self.wanted = locking.ALL_SET
5466 self.do_locking = (self.use_locking and
5467 query.IQ_LIVE in self.requested_data)
5469 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5470 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5471 lu.needed_locks[locking.LEVEL_NODE] = []
5472 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5474 self.do_grouplocks = (self.do_locking and
5475 query.IQ_NODES in self.requested_data)
5477 def DeclareLocks(self, lu, level):
5479 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5480 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5482 # Lock all groups used by instances optimistically; this requires going
5483 # via the node before it's locked, requiring verification later on
5484 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5486 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5487 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5488 elif level == locking.LEVEL_NODE:
5489 lu._LockInstancesNodes() # pylint: disable=W0212
5492 def _CheckGroupLocks(lu):
5493 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5494 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5496 # Check if node groups for locked instances are still correct
5497 for instance_name in owned_instances:
5498 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5500 def _GetQueryData(self, lu):
5501 """Computes the list of instances and their attributes.
5504 if self.do_grouplocks:
5505 self._CheckGroupLocks(lu)
5507 cluster = lu.cfg.GetClusterInfo()
5508 all_info = lu.cfg.GetAllInstancesInfo()
5510 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5512 instance_list = [all_info[name] for name in instance_names]
5513 nodes = frozenset(itertools.chain(*(inst.all_nodes
5514 for inst in instance_list)))
5515 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5518 wrongnode_inst = set()
5520 # Gather data as requested
5521 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5523 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5525 result = node_data[name]
5527 # offline nodes will be in both lists
5528 assert result.fail_msg
5529 offline_nodes.append(name)
5531 bad_nodes.append(name)
5532 elif result.payload:
5533 for inst in result.payload:
5534 if inst in all_info:
5535 if all_info[inst].primary_node == name:
5536 live_data.update(result.payload)
5538 wrongnode_inst.add(inst)
5540 # orphan instance; we don't list it here as we don't
5541 # handle this case yet in the output of instance listing
5542 logging.warning("Orphan instance '%s' found on node %s",
5544 # else no instance is alive
5548 if query.IQ_DISKUSAGE in self.requested_data:
5549 gmi = ganeti.masterd.instance
5550 disk_usage = dict((inst.name,
5551 gmi.ComputeDiskSize(inst.disk_template,
5552 [{constants.IDISK_SIZE: disk.size}
5553 for disk in inst.disks]))
5554 for inst in instance_list)
5558 if query.IQ_CONSOLE in self.requested_data:
5560 for inst in instance_list:
5561 if inst.name in live_data:
5562 # Instance is running
5563 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5565 consinfo[inst.name] = None
5566 assert set(consinfo.keys()) == set(instance_names)
5570 if query.IQ_NODES in self.requested_data:
5571 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5573 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5574 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5575 for uuid in set(map(operator.attrgetter("group"),
5581 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5582 disk_usage, offline_nodes, bad_nodes,
5583 live_data, wrongnode_inst, consinfo,
5587 class LUQuery(NoHooksLU):
5588 """Query for resources/items of a certain kind.
5591 # pylint: disable=W0142
5594 def CheckArguments(self):
5595 qcls = _GetQueryImplementation(self.op.what)
5597 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5599 def ExpandNames(self):
5600 self.impl.ExpandNames(self)
5602 def DeclareLocks(self, level):
5603 self.impl.DeclareLocks(self, level)
5605 def Exec(self, feedback_fn):
5606 return self.impl.NewStyleQuery(self)
5609 class LUQueryFields(NoHooksLU):
5610 """Query for resources/items of a certain kind.
5613 # pylint: disable=W0142
5616 def CheckArguments(self):
5617 self.qcls = _GetQueryImplementation(self.op.what)
5619 def ExpandNames(self):
5620 self.needed_locks = {}
5622 def Exec(self, feedback_fn):
5623 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5626 class LUNodeModifyStorage(NoHooksLU):
5627 """Logical unit for modifying a storage volume on a node.
5632 def CheckArguments(self):
5633 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5635 storage_type = self.op.storage_type
5638 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5640 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5641 " modified" % storage_type,
5644 diff = set(self.op.changes.keys()) - modifiable
5646 raise errors.OpPrereqError("The following fields can not be modified for"
5647 " storage units of type '%s': %r" %
5648 (storage_type, list(diff)),
5651 def ExpandNames(self):
5652 self.needed_locks = {
5653 locking.LEVEL_NODE: self.op.node_name,
5656 def Exec(self, feedback_fn):
5657 """Computes the list of nodes and their attributes.
5660 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5661 result = self.rpc.call_storage_modify(self.op.node_name,
5662 self.op.storage_type, st_args,
5663 self.op.name, self.op.changes)
5664 result.Raise("Failed to modify storage unit '%s' on %s" %
5665 (self.op.name, self.op.node_name))
5668 class LUNodeAdd(LogicalUnit):
5669 """Logical unit for adding node to the cluster.
5673 HTYPE = constants.HTYPE_NODE
5674 _NFLAGS = ["master_capable", "vm_capable"]
5676 def CheckArguments(self):
5677 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5678 # validate/normalize the node name
5679 self.hostname = netutils.GetHostname(name=self.op.node_name,
5680 family=self.primary_ip_family)
5681 self.op.node_name = self.hostname.name
5683 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5684 raise errors.OpPrereqError("Cannot readd the master node",
5687 if self.op.readd and self.op.group:
5688 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5689 " being readded", errors.ECODE_INVAL)
5691 def BuildHooksEnv(self):
5694 This will run on all nodes before, and on all nodes + the new node after.
5698 "OP_TARGET": self.op.node_name,
5699 "NODE_NAME": self.op.node_name,
5700 "NODE_PIP": self.op.primary_ip,
5701 "NODE_SIP": self.op.secondary_ip,
5702 "MASTER_CAPABLE": str(self.op.master_capable),
5703 "VM_CAPABLE": str(self.op.vm_capable),
5706 def BuildHooksNodes(self):
5707 """Build hooks nodes.
5710 # Exclude added node
5711 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5712 post_nodes = pre_nodes + [self.op.node_name, ]
5714 return (pre_nodes, post_nodes)
5716 def CheckPrereq(self):
5717 """Check prerequisites.
5720 - the new node is not already in the config
5722 - its parameters (single/dual homed) matches the cluster
5724 Any errors are signaled by raising errors.OpPrereqError.
5728 hostname = self.hostname
5729 node = hostname.name
5730 primary_ip = self.op.primary_ip = hostname.ip
5731 if self.op.secondary_ip is None:
5732 if self.primary_ip_family == netutils.IP6Address.family:
5733 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5734 " IPv4 address must be given as secondary",
5736 self.op.secondary_ip = primary_ip
5738 secondary_ip = self.op.secondary_ip
5739 if not netutils.IP4Address.IsValid(secondary_ip):
5740 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741 " address" % secondary_ip, errors.ECODE_INVAL)
5743 node_list = cfg.GetNodeList()
5744 if not self.op.readd and node in node_list:
5745 raise errors.OpPrereqError("Node %s is already in the configuration" %
5746 node, errors.ECODE_EXISTS)
5747 elif self.op.readd and node not in node_list:
5748 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5751 self.changed_primary_ip = False
5753 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5754 if self.op.readd and node == existing_node_name:
5755 if existing_node.secondary_ip != secondary_ip:
5756 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5757 " address configuration as before",
5759 if existing_node.primary_ip != primary_ip:
5760 self.changed_primary_ip = True
5764 if (existing_node.primary_ip == primary_ip or
5765 existing_node.secondary_ip == primary_ip or
5766 existing_node.primary_ip == secondary_ip or
5767 existing_node.secondary_ip == secondary_ip):
5768 raise errors.OpPrereqError("New node ip address(es) conflict with"
5769 " existing node %s" % existing_node.name,
5770 errors.ECODE_NOTUNIQUE)
5772 # After this 'if' block, None is no longer a valid value for the
5773 # _capable op attributes
5775 old_node = self.cfg.GetNodeInfo(node)
5776 assert old_node is not None, "Can't retrieve locked node %s" % node
5777 for attr in self._NFLAGS:
5778 if getattr(self.op, attr) is None:
5779 setattr(self.op, attr, getattr(old_node, attr))
5781 for attr in self._NFLAGS:
5782 if getattr(self.op, attr) is None:
5783 setattr(self.op, attr, True)
5785 if self.op.readd and not self.op.vm_capable:
5786 pri, sec = cfg.GetNodeInstances(node)
5788 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5789 " flag set to false, but it already holds"
5790 " instances" % node,
5793 # check that the type of the node (single versus dual homed) is the
5794 # same as for the master
5795 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5796 master_singlehomed = myself.secondary_ip == myself.primary_ip
5797 newbie_singlehomed = secondary_ip == primary_ip
5798 if master_singlehomed != newbie_singlehomed:
5799 if master_singlehomed:
5800 raise errors.OpPrereqError("The master has no secondary ip but the"
5801 " new node has one",
5804 raise errors.OpPrereqError("The master has a secondary ip but the"
5805 " new node doesn't have one",
5808 # checks reachability
5809 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5810 raise errors.OpPrereqError("Node not reachable by ping",
5811 errors.ECODE_ENVIRON)
5813 if not newbie_singlehomed:
5814 # check reachability from my secondary ip to newbie's secondary ip
5815 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5816 source=myself.secondary_ip):
5817 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5818 " based ping to node daemon port",
5819 errors.ECODE_ENVIRON)
5826 if self.op.master_capable:
5827 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5829 self.master_candidate = False
5832 self.new_node = old_node
5834 node_group = cfg.LookupNodeGroup(self.op.group)
5835 self.new_node = objects.Node(name=node,
5836 primary_ip=primary_ip,
5837 secondary_ip=secondary_ip,
5838 master_candidate=self.master_candidate,
5839 offline=False, drained=False,
5842 if self.op.ndparams:
5843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5845 if self.op.hv_state:
5846 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5848 if self.op.disk_state:
5849 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5851 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5852 # it a property on the base class.
5853 result = rpc.DnsOnlyRunner().call_version([node])[node]
5854 result.Raise("Can't get version information from node %s" % node)
5855 if constants.PROTOCOL_VERSION == result.payload:
5856 logging.info("Communication to node %s fine, sw version %s match",
5857 node, result.payload)
5859 raise errors.OpPrereqError("Version mismatch master version %s,"
5860 " node version %s" %
5861 (constants.PROTOCOL_VERSION, result.payload),
5862 errors.ECODE_ENVIRON)
5864 def Exec(self, feedback_fn):
5865 """Adds the new node to the cluster.
5868 new_node = self.new_node
5869 node = new_node.name
5871 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5874 # We adding a new node so we assume it's powered
5875 new_node.powered = True
5877 # for re-adds, reset the offline/drained/master-candidate flags;
5878 # we need to reset here, otherwise offline would prevent RPC calls
5879 # later in the procedure; this also means that if the re-add
5880 # fails, we are left with a non-offlined, broken node
5882 new_node.drained = new_node.offline = False # pylint: disable=W0201
5883 self.LogInfo("Readding a node, the offline/drained flags were reset")
5884 # if we demote the node, we do cleanup later in the procedure
5885 new_node.master_candidate = self.master_candidate
5886 if self.changed_primary_ip:
5887 new_node.primary_ip = self.op.primary_ip
5889 # copy the master/vm_capable flags
5890 for attr in self._NFLAGS:
5891 setattr(new_node, attr, getattr(self.op, attr))
5893 # notify the user about any possible mc promotion
5894 if new_node.master_candidate:
5895 self.LogInfo("Node will be a master candidate")
5897 if self.op.ndparams:
5898 new_node.ndparams = self.op.ndparams
5900 new_node.ndparams = {}
5902 if self.op.hv_state:
5903 new_node.hv_state_static = self.new_hv_state
5905 if self.op.disk_state:
5906 new_node.disk_state_static = self.new_disk_state
5908 # Add node to our /etc/hosts, and add key to known_hosts
5909 if self.cfg.GetClusterInfo().modify_etc_hosts:
5910 master_node = self.cfg.GetMasterNode()
5911 result = self.rpc.call_etc_hosts_modify(master_node,
5912 constants.ETC_HOSTS_ADD,
5915 result.Raise("Can't update hosts file with new host data")
5917 if new_node.secondary_ip != new_node.primary_ip:
5918 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5921 node_verify_list = [self.cfg.GetMasterNode()]
5922 node_verify_param = {
5923 constants.NV_NODELIST: ([node], {}),
5924 # TODO: do a node-net-test as well?
5927 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5928 self.cfg.GetClusterName())
5929 for verifier in node_verify_list:
5930 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5931 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5933 for failed in nl_payload:
5934 feedback_fn("ssh/hostname verification failed"
5935 " (checking from %s): %s" %
5936 (verifier, nl_payload[failed]))
5937 raise errors.OpExecError("ssh/hostname verification failed")
5940 _RedistributeAncillaryFiles(self)
5941 self.context.ReaddNode(new_node)
5942 # make sure we redistribute the config
5943 self.cfg.Update(new_node, feedback_fn)
5944 # and make sure the new node will not have old files around
5945 if not new_node.master_candidate:
5946 result = self.rpc.call_node_demote_from_mc(new_node.name)
5947 msg = result.fail_msg
5949 self.LogWarning("Node failed to demote itself from master"
5950 " candidate status: %s" % msg)
5952 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5953 additional_vm=self.op.vm_capable)
5954 self.context.AddNode(new_node, self.proc.GetECId())
5957 class LUNodeSetParams(LogicalUnit):
5958 """Modifies the parameters of a node.
5960 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5961 to the node role (as _ROLE_*)
5962 @cvar _R2F: a dictionary from node role to tuples of flags
5963 @cvar _FLAGS: a list of attribute names corresponding to the flags
5966 HPATH = "node-modify"
5967 HTYPE = constants.HTYPE_NODE
5969 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5971 (True, False, False): _ROLE_CANDIDATE,
5972 (False, True, False): _ROLE_DRAINED,
5973 (False, False, True): _ROLE_OFFLINE,
5974 (False, False, False): _ROLE_REGULAR,
5976 _R2F = dict((v, k) for k, v in _F2R.items())
5977 _FLAGS = ["master_candidate", "drained", "offline"]
5979 def CheckArguments(self):
5980 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5981 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5982 self.op.master_capable, self.op.vm_capable,
5983 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5985 if all_mods.count(None) == len(all_mods):
5986 raise errors.OpPrereqError("Please pass at least one modification",
5988 if all_mods.count(True) > 1:
5989 raise errors.OpPrereqError("Can't set the node into more than one"
5990 " state at the same time",
5993 # Boolean value that tells us whether we might be demoting from MC
5994 self.might_demote = (self.op.master_candidate is False or
5995 self.op.offline is True or
5996 self.op.drained is True or
5997 self.op.master_capable is False)
5999 if self.op.secondary_ip:
6000 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6001 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6002 " address" % self.op.secondary_ip,
6005 self.lock_all = self.op.auto_promote and self.might_demote
6006 self.lock_instances = self.op.secondary_ip is not None
6008 def _InstanceFilter(self, instance):
6009 """Filter for getting affected instances.
6012 return (instance.disk_template in constants.DTS_INT_MIRROR and
6013 self.op.node_name in instance.all_nodes)
6015 def ExpandNames(self):
6017 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6019 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6021 # Since modifying a node can have severe effects on currently running
6022 # operations the resource lock is at least acquired in shared mode
6023 self.needed_locks[locking.LEVEL_NODE_RES] = \
6024 self.needed_locks[locking.LEVEL_NODE]
6026 # Get node resource and instance locks in shared mode; they are not used
6027 # for anything but read-only access
6028 self.share_locks[locking.LEVEL_NODE_RES] = 1
6029 self.share_locks[locking.LEVEL_INSTANCE] = 1
6031 if self.lock_instances:
6032 self.needed_locks[locking.LEVEL_INSTANCE] = \
6033 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6035 def BuildHooksEnv(self):
6038 This runs on the master node.
6042 "OP_TARGET": self.op.node_name,
6043 "MASTER_CANDIDATE": str(self.op.master_candidate),
6044 "OFFLINE": str(self.op.offline),
6045 "DRAINED": str(self.op.drained),
6046 "MASTER_CAPABLE": str(self.op.master_capable),
6047 "VM_CAPABLE": str(self.op.vm_capable),
6050 def BuildHooksNodes(self):
6051 """Build hooks nodes.
6054 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6057 def CheckPrereq(self):
6058 """Check prerequisites.
6060 This only checks the instance list against the existing names.
6063 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6065 if self.lock_instances:
6066 affected_instances = \
6067 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6069 # Verify instance locks
6070 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6071 wanted_instances = frozenset(affected_instances.keys())
6072 if wanted_instances - owned_instances:
6073 raise errors.OpPrereqError("Instances affected by changing node %s's"
6074 " secondary IP address have changed since"
6075 " locks were acquired, wanted '%s', have"
6076 " '%s'; retry the operation" %
6078 utils.CommaJoin(wanted_instances),
6079 utils.CommaJoin(owned_instances)),
6082 affected_instances = None
6084 if (self.op.master_candidate is not None or
6085 self.op.drained is not None or
6086 self.op.offline is not None):
6087 # we can't change the master's node flags
6088 if self.op.node_name == self.cfg.GetMasterNode():
6089 raise errors.OpPrereqError("The master role can be changed"
6090 " only via master-failover",
6093 if self.op.master_candidate and not node.master_capable:
6094 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6095 " it a master candidate" % node.name,
6098 if self.op.vm_capable is False:
6099 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6101 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6102 " the vm_capable flag" % node.name,
6105 if node.master_candidate and self.might_demote and not self.lock_all:
6106 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6107 # check if after removing the current node, we're missing master
6109 (mc_remaining, mc_should, _) = \
6110 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6111 if mc_remaining < mc_should:
6112 raise errors.OpPrereqError("Not enough master candidates, please"
6113 " pass auto promote option to allow"
6114 " promotion (--auto-promote or RAPI"
6115 " auto_promote=True)", errors.ECODE_STATE)
6117 self.old_flags = old_flags = (node.master_candidate,
6118 node.drained, node.offline)
6119 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6120 self.old_role = old_role = self._F2R[old_flags]
6122 # Check for ineffective changes
6123 for attr in self._FLAGS:
6124 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6125 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6126 setattr(self.op, attr, None)
6128 # Past this point, any flag change to False means a transition
6129 # away from the respective state, as only real changes are kept
6131 # TODO: We might query the real power state if it supports OOB
6132 if _SupportsOob(self.cfg, node):
6133 if self.op.offline is False and not (node.powered or
6134 self.op.powered is True):
6135 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6136 " offline status can be reset") %
6137 self.op.node_name, errors.ECODE_STATE)
6138 elif self.op.powered is not None:
6139 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6140 " as it does not support out-of-band"
6141 " handling") % self.op.node_name,
6144 # If we're being deofflined/drained, we'll MC ourself if needed
6145 if (self.op.drained is False or self.op.offline is False or
6146 (self.op.master_capable and not node.master_capable)):
6147 if _DecideSelfPromotion(self):
6148 self.op.master_candidate = True
6149 self.LogInfo("Auto-promoting node to master candidate")
6151 # If we're no longer master capable, we'll demote ourselves from MC
6152 if self.op.master_capable is False and node.master_candidate:
6153 self.LogInfo("Demoting from master candidate")
6154 self.op.master_candidate = False
6157 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6158 if self.op.master_candidate:
6159 new_role = self._ROLE_CANDIDATE
6160 elif self.op.drained:
6161 new_role = self._ROLE_DRAINED
6162 elif self.op.offline:
6163 new_role = self._ROLE_OFFLINE
6164 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6165 # False is still in new flags, which means we're un-setting (the
6167 new_role = self._ROLE_REGULAR
6168 else: # no new flags, nothing, keep old role
6171 self.new_role = new_role
6173 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6174 # Trying to transition out of offline status
6175 result = self.rpc.call_version([node.name])[node.name]
6177 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6178 " to report its version: %s" %
6179 (node.name, result.fail_msg),
6182 self.LogWarning("Transitioning node from offline to online state"
6183 " without using re-add. Please make sure the node"
6186 # When changing the secondary ip, verify if this is a single-homed to
6187 # multi-homed transition or vice versa, and apply the relevant
6189 if self.op.secondary_ip:
6190 # Ok even without locking, because this can't be changed by any LU
6191 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6192 master_singlehomed = master.secondary_ip == master.primary_ip
6193 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6194 if self.op.force and node.name == master.name:
6195 self.LogWarning("Transitioning from single-homed to multi-homed"
6196 " cluster. All nodes will require a secondary ip.")
6198 raise errors.OpPrereqError("Changing the secondary ip on a"
6199 " single-homed cluster requires the"
6200 " --force option to be passed, and the"
6201 " target node to be the master",
6203 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6204 if self.op.force and node.name == master.name:
6205 self.LogWarning("Transitioning from multi-homed to single-homed"
6206 " cluster. Secondary IPs will have to be removed.")
6208 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6209 " same as the primary IP on a multi-homed"
6210 " cluster, unless the --force option is"
6211 " passed, and the target node is the"
6212 " master", errors.ECODE_INVAL)
6214 assert not (frozenset(affected_instances) -
6215 self.owned_locks(locking.LEVEL_INSTANCE))
6218 if affected_instances:
6219 msg = ("Cannot change secondary IP address: offline node has"
6220 " instances (%s) configured to use it" %
6221 utils.CommaJoin(affected_instances.keys()))
6222 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6224 # On online nodes, check that no instances are running, and that
6225 # the node has the new ip and we can reach it.
6226 for instance in affected_instances.values():
6227 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6228 msg="cannot change secondary ip")
6230 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6231 if master.name != node.name:
6232 # check reachability from master secondary ip to new secondary ip
6233 if not netutils.TcpPing(self.op.secondary_ip,
6234 constants.DEFAULT_NODED_PORT,
6235 source=master.secondary_ip):
6236 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6237 " based ping to node daemon port",
6238 errors.ECODE_ENVIRON)
6240 if self.op.ndparams:
6241 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6242 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6243 self.new_ndparams = new_ndparams
6245 if self.op.hv_state:
6246 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6247 self.node.hv_state_static)
6249 if self.op.disk_state:
6250 self.new_disk_state = \
6251 _MergeAndVerifyDiskState(self.op.disk_state,
6252 self.node.disk_state_static)
6254 def Exec(self, feedback_fn):
6259 old_role = self.old_role
6260 new_role = self.new_role
6264 if self.op.ndparams:
6265 node.ndparams = self.new_ndparams
6267 if self.op.powered is not None:
6268 node.powered = self.op.powered
6270 if self.op.hv_state:
6271 node.hv_state_static = self.new_hv_state
6273 if self.op.disk_state:
6274 node.disk_state_static = self.new_disk_state
6276 for attr in ["master_capable", "vm_capable"]:
6277 val = getattr(self.op, attr)
6279 setattr(node, attr, val)
6280 result.append((attr, str(val)))
6282 if new_role != old_role:
6283 # Tell the node to demote itself, if no longer MC and not offline
6284 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6285 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6287 self.LogWarning("Node failed to demote itself: %s", msg)
6289 new_flags = self._R2F[new_role]
6290 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6292 result.append((desc, str(nf)))
6293 (node.master_candidate, node.drained, node.offline) = new_flags
6295 # we locked all nodes, we adjust the CP before updating this node
6297 _AdjustCandidatePool(self, [node.name])
6299 if self.op.secondary_ip:
6300 node.secondary_ip = self.op.secondary_ip
6301 result.append(("secondary_ip", self.op.secondary_ip))
6303 # this will trigger configuration file update, if needed
6304 self.cfg.Update(node, feedback_fn)
6306 # this will trigger job queue propagation or cleanup if the mc
6308 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6309 self.context.ReaddNode(node)
6314 class LUNodePowercycle(NoHooksLU):
6315 """Powercycles a node.
6320 def CheckArguments(self):
6321 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6322 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6323 raise errors.OpPrereqError("The node is the master and the force"
6324 " parameter was not set",
6327 def ExpandNames(self):
6328 """Locking for PowercycleNode.
6330 This is a last-resort option and shouldn't block on other
6331 jobs. Therefore, we grab no locks.
6334 self.needed_locks = {}
6336 def Exec(self, feedback_fn):
6340 result = self.rpc.call_node_powercycle(self.op.node_name,
6341 self.cfg.GetHypervisorType())
6342 result.Raise("Failed to schedule the reboot")
6343 return result.payload
6346 class LUClusterQuery(NoHooksLU):
6347 """Query cluster configuration.
6352 def ExpandNames(self):
6353 self.needed_locks = {}
6355 def Exec(self, feedback_fn):
6356 """Return cluster config.
6359 cluster = self.cfg.GetClusterInfo()
6362 # Filter just for enabled hypervisors
6363 for os_name, hv_dict in cluster.os_hvp.items():
6364 os_hvp[os_name] = {}
6365 for hv_name, hv_params in hv_dict.items():
6366 if hv_name in cluster.enabled_hypervisors:
6367 os_hvp[os_name][hv_name] = hv_params
6369 # Convert ip_family to ip_version
6370 primary_ip_version = constants.IP4_VERSION
6371 if cluster.primary_ip_family == netutils.IP6Address.family:
6372 primary_ip_version = constants.IP6_VERSION
6375 "software_version": constants.RELEASE_VERSION,
6376 "protocol_version": constants.PROTOCOL_VERSION,
6377 "config_version": constants.CONFIG_VERSION,
6378 "os_api_version": max(constants.OS_API_VERSIONS),
6379 "export_version": constants.EXPORT_VERSION,
6380 "architecture": runtime.GetArchInfo(),
6381 "name": cluster.cluster_name,
6382 "master": cluster.master_node,
6383 "default_hypervisor": cluster.primary_hypervisor,
6384 "enabled_hypervisors": cluster.enabled_hypervisors,
6385 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6386 for hypervisor_name in cluster.enabled_hypervisors]),
6388 "beparams": cluster.beparams,
6389 "osparams": cluster.osparams,
6390 "ipolicy": cluster.ipolicy,
6391 "nicparams": cluster.nicparams,
6392 "ndparams": cluster.ndparams,
6393 "diskparams": cluster.diskparams,
6394 "candidate_pool_size": cluster.candidate_pool_size,
6395 "master_netdev": cluster.master_netdev,
6396 "master_netmask": cluster.master_netmask,
6397 "use_external_mip_script": cluster.use_external_mip_script,
6398 "volume_group_name": cluster.volume_group_name,
6399 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6400 "file_storage_dir": cluster.file_storage_dir,
6401 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6402 "maintain_node_health": cluster.maintain_node_health,
6403 "ctime": cluster.ctime,
6404 "mtime": cluster.mtime,
6405 "uuid": cluster.uuid,
6406 "tags": list(cluster.GetTags()),
6407 "uid_pool": cluster.uid_pool,
6408 "default_iallocator": cluster.default_iallocator,
6409 "reserved_lvs": cluster.reserved_lvs,
6410 "primary_ip_version": primary_ip_version,
6411 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6412 "hidden_os": cluster.hidden_os,
6413 "blacklisted_os": cluster.blacklisted_os,
6419 class LUClusterConfigQuery(NoHooksLU):
6420 """Return configuration values.
6425 def CheckArguments(self):
6426 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6428 def ExpandNames(self):
6429 self.cq.ExpandNames(self)
6431 def DeclareLocks(self, level):
6432 self.cq.DeclareLocks(self, level)
6434 def Exec(self, feedback_fn):
6435 result = self.cq.OldStyleQuery(self)
6437 assert len(result) == 1
6442 class _ClusterQuery(_QueryBase):
6443 FIELDS = query.CLUSTER_FIELDS
6445 #: Do not sort (there is only one item)
6448 def ExpandNames(self, lu):
6449 lu.needed_locks = {}
6451 # The following variables interact with _QueryBase._GetNames
6452 self.wanted = locking.ALL_SET
6453 self.do_locking = self.use_locking
6456 raise errors.OpPrereqError("Can not use locking for cluster queries",
6459 def DeclareLocks(self, lu, level):
6462 def _GetQueryData(self, lu):
6463 """Computes the list of nodes and their attributes.
6466 # Locking is not used
6467 assert not (compat.any(lu.glm.is_owned(level)
6468 for level in locking.LEVELS
6469 if level != locking.LEVEL_CLUSTER) or
6470 self.do_locking or self.use_locking)
6472 if query.CQ_CONFIG in self.requested_data:
6473 cluster = lu.cfg.GetClusterInfo()
6475 cluster = NotImplemented
6477 if query.CQ_QUEUE_DRAINED in self.requested_data:
6478 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6480 drain_flag = NotImplemented
6482 if query.CQ_WATCHER_PAUSE in self.requested_data:
6483 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6485 watcher_pause = NotImplemented
6487 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6490 class LUInstanceActivateDisks(NoHooksLU):
6491 """Bring up an instance's disks.
6496 def ExpandNames(self):
6497 self._ExpandAndLockInstance()
6498 self.needed_locks[locking.LEVEL_NODE] = []
6499 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6501 def DeclareLocks(self, level):
6502 if level == locking.LEVEL_NODE:
6503 self._LockInstancesNodes()
6505 def CheckPrereq(self):
6506 """Check prerequisites.
6508 This checks that the instance is in the cluster.
6511 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6512 assert self.instance is not None, \
6513 "Cannot retrieve locked instance %s" % self.op.instance_name
6514 _CheckNodeOnline(self, self.instance.primary_node)
6516 def Exec(self, feedback_fn):
6517 """Activate the disks.
6520 disks_ok, disks_info = \
6521 _AssembleInstanceDisks(self, self.instance,
6522 ignore_size=self.op.ignore_size)
6524 raise errors.OpExecError("Cannot activate block devices")
6526 if self.op.wait_for_sync:
6527 if not _WaitForSync(self, self.instance):
6528 raise errors.OpExecError("Some disks of the instance are degraded!")
6533 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6535 """Prepare the block devices for an instance.
6537 This sets up the block devices on all nodes.
6539 @type lu: L{LogicalUnit}
6540 @param lu: the logical unit on whose behalf we execute
6541 @type instance: L{objects.Instance}
6542 @param instance: the instance for whose disks we assemble
6543 @type disks: list of L{objects.Disk} or None
6544 @param disks: which disks to assemble (or all, if None)
6545 @type ignore_secondaries: boolean
6546 @param ignore_secondaries: if true, errors on secondary nodes
6547 won't result in an error return from the function
6548 @type ignore_size: boolean
6549 @param ignore_size: if true, the current known size of the disk
6550 will not be used during the disk activation, useful for cases
6551 when the size is wrong
6552 @return: False if the operation failed, otherwise a list of
6553 (host, instance_visible_name, node_visible_name)
6554 with the mapping from node devices to instance devices
6559 iname = instance.name
6560 disks = _ExpandCheckDisks(instance, disks)
6562 # With the two passes mechanism we try to reduce the window of
6563 # opportunity for the race condition of switching DRBD to primary
6564 # before handshaking occured, but we do not eliminate it
6566 # The proper fix would be to wait (with some limits) until the
6567 # connection has been made and drbd transitions from WFConnection
6568 # into any other network-connected state (Connected, SyncTarget,
6571 # 1st pass, assemble on all nodes in secondary mode
6572 for idx, inst_disk in enumerate(disks):
6573 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6575 node_disk = node_disk.Copy()
6576 node_disk.UnsetSize()
6577 lu.cfg.SetDiskID(node_disk, node)
6578 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6580 msg = result.fail_msg
6582 is_offline_secondary = (node in instance.secondary_nodes and
6584 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6585 " (is_primary=False, pass=1): %s",
6586 inst_disk.iv_name, node, msg)
6587 if not (ignore_secondaries or is_offline_secondary):
6590 # FIXME: race condition on drbd migration to primary
6592 # 2nd pass, do only the primary node
6593 for idx, inst_disk in enumerate(disks):
6596 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6597 if node != instance.primary_node:
6600 node_disk = node_disk.Copy()
6601 node_disk.UnsetSize()
6602 lu.cfg.SetDiskID(node_disk, node)
6603 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6605 msg = result.fail_msg
6607 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6608 " (is_primary=True, pass=2): %s",
6609 inst_disk.iv_name, node, msg)
6612 dev_path = result.payload
6614 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6616 # leave the disks configured for the primary node
6617 # this is a workaround that would be fixed better by
6618 # improving the logical/physical id handling
6620 lu.cfg.SetDiskID(disk, instance.primary_node)
6622 return disks_ok, device_info
6625 def _StartInstanceDisks(lu, instance, force):
6626 """Start the disks of an instance.
6629 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6630 ignore_secondaries=force)
6632 _ShutdownInstanceDisks(lu, instance)
6633 if force is not None and not force:
6634 lu.proc.LogWarning("", hint="If the message above refers to a"
6636 " you can retry the operation using '--force'.")
6637 raise errors.OpExecError("Disk consistency error")
6640 class LUInstanceDeactivateDisks(NoHooksLU):
6641 """Shutdown an instance's disks.
6646 def ExpandNames(self):
6647 self._ExpandAndLockInstance()
6648 self.needed_locks[locking.LEVEL_NODE] = []
6649 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6651 def DeclareLocks(self, level):
6652 if level == locking.LEVEL_NODE:
6653 self._LockInstancesNodes()
6655 def CheckPrereq(self):
6656 """Check prerequisites.
6658 This checks that the instance is in the cluster.
6661 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6662 assert self.instance is not None, \
6663 "Cannot retrieve locked instance %s" % self.op.instance_name
6665 def Exec(self, feedback_fn):
6666 """Deactivate the disks
6669 instance = self.instance
6671 _ShutdownInstanceDisks(self, instance)
6673 _SafeShutdownInstanceDisks(self, instance)
6676 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6677 """Shutdown block devices of an instance.
6679 This function checks if an instance is running, before calling
6680 _ShutdownInstanceDisks.
6683 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6684 _ShutdownInstanceDisks(lu, instance, disks=disks)
6687 def _ExpandCheckDisks(instance, disks):
6688 """Return the instance disks selected by the disks list
6690 @type disks: list of L{objects.Disk} or None
6691 @param disks: selected disks
6692 @rtype: list of L{objects.Disk}
6693 @return: selected instance disks to act on
6697 return instance.disks
6699 if not set(disks).issubset(instance.disks):
6700 raise errors.ProgrammerError("Can only act on disks belonging to the"
6705 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6706 """Shutdown block devices of an instance.
6708 This does the shutdown on all nodes of the instance.
6710 If the ignore_primary is false, errors on the primary node are
6715 disks = _ExpandCheckDisks(instance, disks)
6718 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6719 lu.cfg.SetDiskID(top_disk, node)
6720 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6721 msg = result.fail_msg
6723 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6724 disk.iv_name, node, msg)
6725 if ((node == instance.primary_node and not ignore_primary) or
6726 (node != instance.primary_node and not result.offline)):
6731 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6732 """Checks if a node has enough free memory.
6734 This function check if a given node has the needed amount of free
6735 memory. In case the node has less memory or we cannot get the
6736 information from the node, this function raise an OpPrereqError
6739 @type lu: C{LogicalUnit}
6740 @param lu: a logical unit from which we get configuration data
6742 @param node: the node to check
6743 @type reason: C{str}
6744 @param reason: string to use in the error message
6745 @type requested: C{int}
6746 @param requested: the amount of memory in MiB to check for
6747 @type hypervisor_name: C{str}
6748 @param hypervisor_name: the hypervisor to ask for memory stats
6750 @return: node current free memory
6751 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6752 we cannot check the node
6755 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6756 nodeinfo[node].Raise("Can't get data from node %s" % node,
6757 prereq=True, ecode=errors.ECODE_ENVIRON)
6758 (_, _, (hv_info, )) = nodeinfo[node].payload
6760 free_mem = hv_info.get("memory_free", None)
6761 if not isinstance(free_mem, int):
6762 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6763 " was '%s'" % (node, free_mem),
6764 errors.ECODE_ENVIRON)
6765 if requested > free_mem:
6766 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6767 " needed %s MiB, available %s MiB" %
6768 (node, reason, requested, free_mem),
6773 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6774 """Checks if nodes have enough free disk space in the all VGs.
6776 This function check if all given nodes have the needed amount of
6777 free disk. In case any node has less disk or we cannot get the
6778 information from the node, this function raise an OpPrereqError
6781 @type lu: C{LogicalUnit}
6782 @param lu: a logical unit from which we get configuration data
6783 @type nodenames: C{list}
6784 @param nodenames: the list of node names to check
6785 @type req_sizes: C{dict}
6786 @param req_sizes: the hash of vg and corresponding amount of disk in
6788 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6789 or we cannot check the node
6792 for vg, req_size in req_sizes.items():
6793 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6796 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6797 """Checks if nodes have enough free disk space in the specified VG.
6799 This function check if all given nodes have the needed amount of
6800 free disk. In case any node has less disk or we cannot get the
6801 information from the node, this function raise an OpPrereqError
6804 @type lu: C{LogicalUnit}
6805 @param lu: a logical unit from which we get configuration data
6806 @type nodenames: C{list}
6807 @param nodenames: the list of node names to check
6809 @param vg: the volume group to check
6810 @type requested: C{int}
6811 @param requested: the amount of disk in MiB to check for
6812 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6813 or we cannot check the node
6816 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6817 for node in nodenames:
6818 info = nodeinfo[node]
6819 info.Raise("Cannot get current information from node %s" % node,
6820 prereq=True, ecode=errors.ECODE_ENVIRON)
6821 (_, (vg_info, ), _) = info.payload
6822 vg_free = vg_info.get("vg_free", None)
6823 if not isinstance(vg_free, int):
6824 raise errors.OpPrereqError("Can't compute free disk space on node"
6825 " %s for vg %s, result was '%s'" %
6826 (node, vg, vg_free), errors.ECODE_ENVIRON)
6827 if requested > vg_free:
6828 raise errors.OpPrereqError("Not enough disk space on target node %s"
6829 " vg %s: required %d MiB, available %d MiB" %
6830 (node, vg, requested, vg_free),
6834 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6835 """Checks if nodes have enough physical CPUs
6837 This function checks if all given nodes have the needed number of
6838 physical CPUs. In case any node has less CPUs or we cannot get the
6839 information from the node, this function raises an OpPrereqError
6842 @type lu: C{LogicalUnit}
6843 @param lu: a logical unit from which we get configuration data
6844 @type nodenames: C{list}
6845 @param nodenames: the list of node names to check
6846 @type requested: C{int}
6847 @param requested: the minimum acceptable number of physical CPUs
6848 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6849 or we cannot check the node
6852 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6853 for node in nodenames:
6854 info = nodeinfo[node]
6855 info.Raise("Cannot get current information from node %s" % node,
6856 prereq=True, ecode=errors.ECODE_ENVIRON)
6857 (_, _, (hv_info, )) = info.payload
6858 num_cpus = hv_info.get("cpu_total", None)
6859 if not isinstance(num_cpus, int):
6860 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6861 " on node %s, result was '%s'" %
6862 (node, num_cpus), errors.ECODE_ENVIRON)
6863 if requested > num_cpus:
6864 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6865 "required" % (node, num_cpus, requested),
6869 class LUInstanceStartup(LogicalUnit):
6870 """Starts an instance.
6873 HPATH = "instance-start"
6874 HTYPE = constants.HTYPE_INSTANCE
6877 def CheckArguments(self):
6879 if self.op.beparams:
6880 # fill the beparams dict
6881 objects.UpgradeBeParams(self.op.beparams)
6882 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6884 def ExpandNames(self):
6885 self._ExpandAndLockInstance()
6886 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6888 def DeclareLocks(self, level):
6889 if level == locking.LEVEL_NODE_RES:
6890 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6892 def BuildHooksEnv(self):
6895 This runs on master, primary and secondary nodes of the instance.
6899 "FORCE": self.op.force,
6902 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6906 def BuildHooksNodes(self):
6907 """Build hooks nodes.
6910 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6913 def CheckPrereq(self):
6914 """Check prerequisites.
6916 This checks that the instance is in the cluster.
6919 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6920 assert self.instance is not None, \
6921 "Cannot retrieve locked instance %s" % self.op.instance_name
6924 if self.op.hvparams:
6925 # check hypervisor parameter syntax (locally)
6926 cluster = self.cfg.GetClusterInfo()
6927 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6928 filled_hvp = cluster.FillHV(instance)
6929 filled_hvp.update(self.op.hvparams)
6930 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6931 hv_type.CheckParameterSyntax(filled_hvp)
6932 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6934 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6936 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6938 if self.primary_offline and self.op.ignore_offline_nodes:
6939 self.proc.LogWarning("Ignoring offline primary node")
6941 if self.op.hvparams or self.op.beparams:
6942 self.proc.LogWarning("Overridden parameters are ignored")
6944 _CheckNodeOnline(self, instance.primary_node)
6946 bep = self.cfg.GetClusterInfo().FillBE(instance)
6947 bep.update(self.op.beparams)
6949 # check bridges existence
6950 _CheckInstanceBridgesExist(self, instance)
6952 remote_info = self.rpc.call_instance_info(instance.primary_node,
6954 instance.hypervisor)
6955 remote_info.Raise("Error checking node %s" % instance.primary_node,
6956 prereq=True, ecode=errors.ECODE_ENVIRON)
6957 if not remote_info.payload: # not running already
6958 _CheckNodeFreeMemory(self, instance.primary_node,
6959 "starting instance %s" % instance.name,
6960 bep[constants.BE_MINMEM], instance.hypervisor)
6962 def Exec(self, feedback_fn):
6963 """Start the instance.
6966 instance = self.instance
6967 force = self.op.force
6969 if not self.op.no_remember:
6970 self.cfg.MarkInstanceUp(instance.name)
6972 if self.primary_offline:
6973 assert self.op.ignore_offline_nodes
6974 self.proc.LogInfo("Primary node offline, marked instance as started")
6976 node_current = instance.primary_node
6978 _StartInstanceDisks(self, instance, force)
6981 self.rpc.call_instance_start(node_current,
6982 (instance, self.op.hvparams,
6984 self.op.startup_paused)
6985 msg = result.fail_msg
6987 _ShutdownInstanceDisks(self, instance)
6988 raise errors.OpExecError("Could not start instance: %s" % msg)
6991 class LUInstanceReboot(LogicalUnit):
6992 """Reboot an instance.
6995 HPATH = "instance-reboot"
6996 HTYPE = constants.HTYPE_INSTANCE
6999 def ExpandNames(self):
7000 self._ExpandAndLockInstance()
7002 def BuildHooksEnv(self):
7005 This runs on master, primary and secondary nodes of the instance.
7009 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7010 "REBOOT_TYPE": self.op.reboot_type,
7011 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7014 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7018 def BuildHooksNodes(self):
7019 """Build hooks nodes.
7022 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7025 def CheckPrereq(self):
7026 """Check prerequisites.
7028 This checks that the instance is in the cluster.
7031 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7032 assert self.instance is not None, \
7033 "Cannot retrieve locked instance %s" % self.op.instance_name
7034 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7035 _CheckNodeOnline(self, instance.primary_node)
7037 # check bridges existence
7038 _CheckInstanceBridgesExist(self, instance)
7040 def Exec(self, feedback_fn):
7041 """Reboot the instance.
7044 instance = self.instance
7045 ignore_secondaries = self.op.ignore_secondaries
7046 reboot_type = self.op.reboot_type
7048 remote_info = self.rpc.call_instance_info(instance.primary_node,
7050 instance.hypervisor)
7051 remote_info.Raise("Error checking node %s" % instance.primary_node)
7052 instance_running = bool(remote_info.payload)
7054 node_current = instance.primary_node
7056 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7057 constants.INSTANCE_REBOOT_HARD]:
7058 for disk in instance.disks:
7059 self.cfg.SetDiskID(disk, node_current)
7060 result = self.rpc.call_instance_reboot(node_current, instance,
7062 self.op.shutdown_timeout)
7063 result.Raise("Could not reboot instance")
7065 if instance_running:
7066 result = self.rpc.call_instance_shutdown(node_current, instance,
7067 self.op.shutdown_timeout)
7068 result.Raise("Could not shutdown instance for full reboot")
7069 _ShutdownInstanceDisks(self, instance)
7071 self.LogInfo("Instance %s was already stopped, starting now",
7073 _StartInstanceDisks(self, instance, ignore_secondaries)
7074 result = self.rpc.call_instance_start(node_current,
7075 (instance, None, None), False)
7076 msg = result.fail_msg
7078 _ShutdownInstanceDisks(self, instance)
7079 raise errors.OpExecError("Could not start instance for"
7080 " full reboot: %s" % msg)
7082 self.cfg.MarkInstanceUp(instance.name)
7085 class LUInstanceShutdown(LogicalUnit):
7086 """Shutdown an instance.
7089 HPATH = "instance-stop"
7090 HTYPE = constants.HTYPE_INSTANCE
7093 def ExpandNames(self):
7094 self._ExpandAndLockInstance()
7096 def BuildHooksEnv(self):
7099 This runs on master, primary and secondary nodes of the instance.
7102 env = _BuildInstanceHookEnvByObject(self, self.instance)
7103 env["TIMEOUT"] = self.op.timeout
7106 def BuildHooksNodes(self):
7107 """Build hooks nodes.
7110 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7113 def CheckPrereq(self):
7114 """Check prerequisites.
7116 This checks that the instance is in the cluster.
7119 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7120 assert self.instance is not None, \
7121 "Cannot retrieve locked instance %s" % self.op.instance_name
7123 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7125 self.primary_offline = \
7126 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7128 if self.primary_offline and self.op.ignore_offline_nodes:
7129 self.proc.LogWarning("Ignoring offline primary node")
7131 _CheckNodeOnline(self, self.instance.primary_node)
7133 def Exec(self, feedback_fn):
7134 """Shutdown the instance.
7137 instance = self.instance
7138 node_current = instance.primary_node
7139 timeout = self.op.timeout
7141 if not self.op.no_remember:
7142 self.cfg.MarkInstanceDown(instance.name)
7144 if self.primary_offline:
7145 assert self.op.ignore_offline_nodes
7146 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7148 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7149 msg = result.fail_msg
7151 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7153 _ShutdownInstanceDisks(self, instance)
7156 class LUInstanceReinstall(LogicalUnit):
7157 """Reinstall an instance.
7160 HPATH = "instance-reinstall"
7161 HTYPE = constants.HTYPE_INSTANCE
7164 def ExpandNames(self):
7165 self._ExpandAndLockInstance()
7167 def BuildHooksEnv(self):
7170 This runs on master, primary and secondary nodes of the instance.
7173 return _BuildInstanceHookEnvByObject(self, self.instance)
7175 def BuildHooksNodes(self):
7176 """Build hooks nodes.
7179 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7182 def CheckPrereq(self):
7183 """Check prerequisites.
7185 This checks that the instance is in the cluster and is not running.
7188 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7189 assert instance is not None, \
7190 "Cannot retrieve locked instance %s" % self.op.instance_name
7191 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7192 " offline, cannot reinstall")
7194 if instance.disk_template == constants.DT_DISKLESS:
7195 raise errors.OpPrereqError("Instance '%s' has no disks" %
7196 self.op.instance_name,
7198 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7200 if self.op.os_type is not None:
7202 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7203 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7204 instance_os = self.op.os_type
7206 instance_os = instance.os
7208 nodelist = list(instance.all_nodes)
7210 if self.op.osparams:
7211 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7212 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7213 self.os_inst = i_osdict # the new dict (without defaults)
7217 self.instance = instance
7219 def Exec(self, feedback_fn):
7220 """Reinstall the instance.
7223 inst = self.instance
7225 if self.op.os_type is not None:
7226 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7227 inst.os = self.op.os_type
7228 # Write to configuration
7229 self.cfg.Update(inst, feedback_fn)
7231 _StartInstanceDisks(self, inst, None)
7233 feedback_fn("Running the instance OS create scripts...")
7234 # FIXME: pass debug option from opcode to backend
7235 result = self.rpc.call_instance_os_add(inst.primary_node,
7236 (inst, self.os_inst), True,
7237 self.op.debug_level)
7238 result.Raise("Could not install OS for instance %s on node %s" %
7239 (inst.name, inst.primary_node))
7241 _ShutdownInstanceDisks(self, inst)
7244 class LUInstanceRecreateDisks(LogicalUnit):
7245 """Recreate an instance's missing disks.
7248 HPATH = "instance-recreate-disks"
7249 HTYPE = constants.HTYPE_INSTANCE
7252 _MODIFYABLE = frozenset([
7253 constants.IDISK_SIZE,
7254 constants.IDISK_MODE,
7257 # New or changed disk parameters may have different semantics
7258 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7259 constants.IDISK_ADOPT,
7261 # TODO: Implement support changing VG while recreating
7263 constants.IDISK_METAVG,
7266 def _RunAllocator(self):
7267 """Run the allocator based on input opcode.
7270 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7273 # The allocator should actually run in "relocate" mode, but current
7274 # allocators don't support relocating all the nodes of an instance at
7275 # the same time. As a workaround we use "allocate" mode, but this is
7276 # suboptimal for two reasons:
7277 # - The instance name passed to the allocator is present in the list of
7278 # existing instances, so there could be a conflict within the
7279 # internal structures of the allocator. This doesn't happen with the
7280 # current allocators, but it's a liability.
7281 # - The allocator counts the resources used by the instance twice: once
7282 # because the instance exists already, and once because it tries to
7283 # allocate a new instance.
7284 # The allocator could choose some of the nodes on which the instance is
7285 # running, but that's not a problem. If the instance nodes are broken,
7286 # they should be already be marked as drained or offline, and hence
7287 # skipped by the allocator. If instance disks have been lost for other
7288 # reasons, then recreating the disks on the same nodes should be fine.
7289 disk_template = self.instance.disk_template
7290 spindle_use = be_full[constants.BE_SPINDLE_USE]
7291 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7292 disk_template=disk_template,
7293 tags=list(self.instance.GetTags()),
7294 os=self.instance.os,
7296 vcpus=be_full[constants.BE_VCPUS],
7297 memory=be_full[constants.BE_MAXMEM],
7298 spindle_use=spindle_use,
7299 disks=[{constants.IDISK_SIZE: d.size,
7300 constants.IDISK_MODE: d.mode}
7301 for d in self.instance.disks],
7302 hypervisor=self.instance.hypervisor)
7303 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7305 ial.Run(self.op.iallocator)
7307 assert req.RequiredNodes() == len(self.instance.all_nodes)
7310 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7311 " %s" % (self.op.iallocator, ial.info),
7314 self.op.nodes = ial.result
7315 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7316 self.op.instance_name, self.op.iallocator,
7317 utils.CommaJoin(ial.result))
7319 def CheckArguments(self):
7320 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7321 # Normalize and convert deprecated list of disk indices
7322 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7324 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7326 raise errors.OpPrereqError("Some disks have been specified more than"
7327 " once: %s" % utils.CommaJoin(duplicates),
7330 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7331 # when neither iallocator nor nodes are specified
7332 if self.op.iallocator or self.op.nodes:
7333 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7335 for (idx, params) in self.op.disks:
7336 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7337 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7339 raise errors.OpPrereqError("Parameters for disk %s try to change"
7340 " unmodifyable parameter(s): %s" %
7341 (idx, utils.CommaJoin(unsupported)),
7344 def ExpandNames(self):
7345 self._ExpandAndLockInstance()
7346 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7348 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7349 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7351 self.needed_locks[locking.LEVEL_NODE] = []
7352 if self.op.iallocator:
7353 # iallocator will select a new node in the same group
7354 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7355 self.needed_locks[locking.LEVEL_NODE_RES] = []
7357 def DeclareLocks(self, level):
7358 if level == locking.LEVEL_NODEGROUP:
7359 assert self.op.iallocator is not None
7360 assert not self.op.nodes
7361 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7362 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7363 # Lock the primary group used by the instance optimistically; this
7364 # requires going via the node before it's locked, requiring
7365 # verification later on
7366 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7367 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7369 elif level == locking.LEVEL_NODE:
7370 # If an allocator is used, then we lock all the nodes in the current
7371 # instance group, as we don't know yet which ones will be selected;
7372 # if we replace the nodes without using an allocator, locks are
7373 # already declared in ExpandNames; otherwise, we need to lock all the
7374 # instance nodes for disk re-creation
7375 if self.op.iallocator:
7376 assert not self.op.nodes
7377 assert not self.needed_locks[locking.LEVEL_NODE]
7378 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7380 # Lock member nodes of the group of the primary node
7381 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7382 self.needed_locks[locking.LEVEL_NODE].extend(
7383 self.cfg.GetNodeGroup(group_uuid).members)
7384 elif not self.op.nodes:
7385 self._LockInstancesNodes(primary_only=False)
7386 elif level == locking.LEVEL_NODE_RES:
7388 self.needed_locks[locking.LEVEL_NODE_RES] = \
7389 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7391 def BuildHooksEnv(self):
7394 This runs on master, primary and secondary nodes of the instance.
7397 return _BuildInstanceHookEnvByObject(self, self.instance)
7399 def BuildHooksNodes(self):
7400 """Build hooks nodes.
7403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7406 def CheckPrereq(self):
7407 """Check prerequisites.
7409 This checks that the instance is in the cluster and is not running.
7412 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7413 assert instance is not None, \
7414 "Cannot retrieve locked instance %s" % self.op.instance_name
7416 if len(self.op.nodes) != len(instance.all_nodes):
7417 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7418 " %d replacement nodes were specified" %
7419 (instance.name, len(instance.all_nodes),
7420 len(self.op.nodes)),
7422 assert instance.disk_template != constants.DT_DRBD8 or \
7423 len(self.op.nodes) == 2
7424 assert instance.disk_template != constants.DT_PLAIN or \
7425 len(self.op.nodes) == 1
7426 primary_node = self.op.nodes[0]
7428 primary_node = instance.primary_node
7429 if not self.op.iallocator:
7430 _CheckNodeOnline(self, primary_node)
7432 if instance.disk_template == constants.DT_DISKLESS:
7433 raise errors.OpPrereqError("Instance '%s' has no disks" %
7434 self.op.instance_name, errors.ECODE_INVAL)
7436 # Verify if node group locks are still correct
7437 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7439 # Node group locks are acquired only for the primary node (and only
7440 # when the allocator is used)
7441 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7444 # if we replace nodes *and* the old primary is offline, we don't
7445 # check the instance state
7446 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7447 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7448 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7449 msg="cannot recreate disks")
7452 self.disks = dict(self.op.disks)
7454 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7456 maxidx = max(self.disks.keys())
7457 if maxidx >= len(instance.disks):
7458 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7461 if ((self.op.nodes or self.op.iallocator) and
7462 sorted(self.disks.keys()) != range(len(instance.disks))):
7463 raise errors.OpPrereqError("Can't recreate disks partially and"
7464 " change the nodes at the same time",
7467 self.instance = instance
7469 if self.op.iallocator:
7470 self._RunAllocator()
7471 # Release unneeded node and node resource locks
7472 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7473 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7475 def Exec(self, feedback_fn):
7476 """Recreate the disks.
7479 instance = self.instance
7481 assert (self.owned_locks(locking.LEVEL_NODE) ==
7482 self.owned_locks(locking.LEVEL_NODE_RES))
7485 mods = [] # keeps track of needed changes
7487 for idx, disk in enumerate(instance.disks):
7489 changes = self.disks[idx]
7491 # Disk should not be recreated
7495 # update secondaries for disks, if needed
7496 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7497 # need to update the nodes and minors
7498 assert len(self.op.nodes) == 2
7499 assert len(disk.logical_id) == 6 # otherwise disk internals
7501 (_, _, old_port, _, _, old_secret) = disk.logical_id
7502 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7503 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7504 new_minors[0], new_minors[1], old_secret)
7505 assert len(disk.logical_id) == len(new_id)
7509 mods.append((idx, new_id, changes))
7511 # now that we have passed all asserts above, we can apply the mods
7512 # in a single run (to avoid partial changes)
7513 for idx, new_id, changes in mods:
7514 disk = instance.disks[idx]
7515 if new_id is not None:
7516 assert disk.dev_type == constants.LD_DRBD8
7517 disk.logical_id = new_id
7519 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7520 mode=changes.get(constants.IDISK_MODE, None))
7522 # change primary node, if needed
7524 instance.primary_node = self.op.nodes[0]
7525 self.LogWarning("Changing the instance's nodes, you will have to"
7526 " remove any disks left on the older nodes manually")
7529 self.cfg.Update(instance, feedback_fn)
7531 # All touched nodes must be locked
7532 mylocks = self.owned_locks(locking.LEVEL_NODE)
7533 assert mylocks.issuperset(frozenset(instance.all_nodes))
7534 _CreateDisks(self, instance, to_skip=to_skip)
7537 class LUInstanceRename(LogicalUnit):
7538 """Rename an instance.
7541 HPATH = "instance-rename"
7542 HTYPE = constants.HTYPE_INSTANCE
7544 def CheckArguments(self):
7548 if self.op.ip_check and not self.op.name_check:
7549 # TODO: make the ip check more flexible and not depend on the name check
7550 raise errors.OpPrereqError("IP address check requires a name check",
7553 def BuildHooksEnv(self):
7556 This runs on master, primary and secondary nodes of the instance.
7559 env = _BuildInstanceHookEnvByObject(self, self.instance)
7560 env["INSTANCE_NEW_NAME"] = self.op.new_name
7563 def BuildHooksNodes(self):
7564 """Build hooks nodes.
7567 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7570 def CheckPrereq(self):
7571 """Check prerequisites.
7573 This checks that the instance is in the cluster and is not running.
7576 self.op.instance_name = _ExpandInstanceName(self.cfg,
7577 self.op.instance_name)
7578 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7579 assert instance is not None
7580 _CheckNodeOnline(self, instance.primary_node)
7581 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7582 msg="cannot rename")
7583 self.instance = instance
7585 new_name = self.op.new_name
7586 if self.op.name_check:
7587 hostname = _CheckHostnameSane(self, new_name)
7588 new_name = self.op.new_name = hostname.name
7589 if (self.op.ip_check and
7590 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7591 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7592 (hostname.ip, new_name),
7593 errors.ECODE_NOTUNIQUE)
7595 instance_list = self.cfg.GetInstanceList()
7596 if new_name in instance_list and new_name != instance.name:
7597 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7598 new_name, errors.ECODE_EXISTS)
7600 def Exec(self, feedback_fn):
7601 """Rename the instance.
7604 inst = self.instance
7605 old_name = inst.name
7607 rename_file_storage = False
7608 if (inst.disk_template in constants.DTS_FILEBASED and
7609 self.op.new_name != inst.name):
7610 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7611 rename_file_storage = True
7613 self.cfg.RenameInstance(inst.name, self.op.new_name)
7614 # Change the instance lock. This is definitely safe while we hold the BGL.
7615 # Otherwise the new lock would have to be added in acquired mode.
7617 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7618 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7620 # re-read the instance from the configuration after rename
7621 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7623 if rename_file_storage:
7624 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7625 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7626 old_file_storage_dir,
7627 new_file_storage_dir)
7628 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7629 " (but the instance has been renamed in Ganeti)" %
7630 (inst.primary_node, old_file_storage_dir,
7631 new_file_storage_dir))
7633 _StartInstanceDisks(self, inst, None)
7634 # update info on disks
7635 info = _GetInstanceInfoText(inst)
7636 for (idx, disk) in enumerate(inst.disks):
7637 for node in inst.all_nodes:
7638 self.cfg.SetDiskID(disk, node)
7639 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7641 self.LogWarning("Error setting info on node %s for disk %s: %s",
7642 node, idx, result.fail_msg)
7644 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7645 old_name, self.op.debug_level)
7646 msg = result.fail_msg
7648 msg = ("Could not run OS rename script for instance %s on node %s"
7649 " (but the instance has been renamed in Ganeti): %s" %
7650 (inst.name, inst.primary_node, msg))
7651 self.proc.LogWarning(msg)
7653 _ShutdownInstanceDisks(self, inst)
7658 class LUInstanceRemove(LogicalUnit):
7659 """Remove an instance.
7662 HPATH = "instance-remove"
7663 HTYPE = constants.HTYPE_INSTANCE
7666 def ExpandNames(self):
7667 self._ExpandAndLockInstance()
7668 self.needed_locks[locking.LEVEL_NODE] = []
7669 self.needed_locks[locking.LEVEL_NODE_RES] = []
7670 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7672 def DeclareLocks(self, level):
7673 if level == locking.LEVEL_NODE:
7674 self._LockInstancesNodes()
7675 elif level == locking.LEVEL_NODE_RES:
7677 self.needed_locks[locking.LEVEL_NODE_RES] = \
7678 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7680 def BuildHooksEnv(self):
7683 This runs on master, primary and secondary nodes of the instance.
7686 env = _BuildInstanceHookEnvByObject(self, self.instance)
7687 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7690 def BuildHooksNodes(self):
7691 """Build hooks nodes.
7694 nl = [self.cfg.GetMasterNode()]
7695 nl_post = list(self.instance.all_nodes) + nl
7696 return (nl, nl_post)
7698 def CheckPrereq(self):
7699 """Check prerequisites.
7701 This checks that the instance is in the cluster.
7704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7705 assert self.instance is not None, \
7706 "Cannot retrieve locked instance %s" % self.op.instance_name
7708 def Exec(self, feedback_fn):
7709 """Remove the instance.
7712 instance = self.instance
7713 logging.info("Shutting down instance %s on node %s",
7714 instance.name, instance.primary_node)
7716 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7717 self.op.shutdown_timeout)
7718 msg = result.fail_msg
7720 if self.op.ignore_failures:
7721 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7723 raise errors.OpExecError("Could not shutdown instance %s on"
7725 (instance.name, instance.primary_node, msg))
7727 assert (self.owned_locks(locking.LEVEL_NODE) ==
7728 self.owned_locks(locking.LEVEL_NODE_RES))
7729 assert not (set(instance.all_nodes) -
7730 self.owned_locks(locking.LEVEL_NODE)), \
7731 "Not owning correct locks"
7733 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7736 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7737 """Utility function to remove an instance.
7740 logging.info("Removing block devices for instance %s", instance.name)
7742 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7743 if not ignore_failures:
7744 raise errors.OpExecError("Can't remove instance's disks")
7745 feedback_fn("Warning: can't remove instance's disks")
7747 logging.info("Removing instance %s out of cluster config", instance.name)
7749 lu.cfg.RemoveInstance(instance.name)
7751 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7752 "Instance lock removal conflict"
7754 # Remove lock for the instance
7755 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7758 class LUInstanceQuery(NoHooksLU):
7759 """Logical unit for querying instances.
7762 # pylint: disable=W0142
7765 def CheckArguments(self):
7766 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7767 self.op.output_fields, self.op.use_locking)
7769 def ExpandNames(self):
7770 self.iq.ExpandNames(self)
7772 def DeclareLocks(self, level):
7773 self.iq.DeclareLocks(self, level)
7775 def Exec(self, feedback_fn):
7776 return self.iq.OldStyleQuery(self)
7779 def _ExpandNamesForMigration(lu):
7780 """Expands names for use with L{TLMigrateInstance}.
7782 @type lu: L{LogicalUnit}
7785 if lu.op.target_node is not None:
7786 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7788 lu.needed_locks[locking.LEVEL_NODE] = []
7789 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7791 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7792 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7795 def _DeclareLocksForMigration(lu, level):
7796 """Declares locks for L{TLMigrateInstance}.
7798 @type lu: L{LogicalUnit}
7799 @param level: Lock level
7802 if level == locking.LEVEL_NODE:
7803 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7804 if instance.disk_template in constants.DTS_EXT_MIRROR:
7805 if lu.op.target_node is None:
7806 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7808 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7810 del lu.recalculate_locks[locking.LEVEL_NODE]
7812 lu._LockInstancesNodes() # pylint: disable=W0212
7813 elif level == locking.LEVEL_NODE_RES:
7815 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7816 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7819 class LUInstanceFailover(LogicalUnit):
7820 """Failover an instance.
7823 HPATH = "instance-failover"
7824 HTYPE = constants.HTYPE_INSTANCE
7827 def CheckArguments(self):
7828 """Check the arguments.
7831 self.iallocator = getattr(self.op, "iallocator", None)
7832 self.target_node = getattr(self.op, "target_node", None)
7834 def ExpandNames(self):
7835 self._ExpandAndLockInstance()
7836 _ExpandNamesForMigration(self)
7839 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7840 self.op.ignore_consistency, True,
7841 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7843 self.tasklets = [self._migrater]
7845 def DeclareLocks(self, level):
7846 _DeclareLocksForMigration(self, level)
7848 def BuildHooksEnv(self):
7851 This runs on master, primary and secondary nodes of the instance.
7854 instance = self._migrater.instance
7855 source_node = instance.primary_node
7856 target_node = self.op.target_node
7858 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7859 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7860 "OLD_PRIMARY": source_node,
7861 "NEW_PRIMARY": target_node,
7864 if instance.disk_template in constants.DTS_INT_MIRROR:
7865 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7866 env["NEW_SECONDARY"] = source_node
7868 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7870 env.update(_BuildInstanceHookEnvByObject(self, instance))
7874 def BuildHooksNodes(self):
7875 """Build hooks nodes.
7878 instance = self._migrater.instance
7879 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7880 return (nl, nl + [instance.primary_node])
7883 class LUInstanceMigrate(LogicalUnit):
7884 """Migrate an instance.
7886 This is migration without shutting down, compared to the failover,
7887 which is done with shutdown.
7890 HPATH = "instance-migrate"
7891 HTYPE = constants.HTYPE_INSTANCE
7894 def ExpandNames(self):
7895 self._ExpandAndLockInstance()
7896 _ExpandNamesForMigration(self)
7899 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7900 False, self.op.allow_failover, False,
7901 self.op.allow_runtime_changes,
7902 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7903 self.op.ignore_ipolicy)
7905 self.tasklets = [self._migrater]
7907 def DeclareLocks(self, level):
7908 _DeclareLocksForMigration(self, level)
7910 def BuildHooksEnv(self):
7913 This runs on master, primary and secondary nodes of the instance.
7916 instance = self._migrater.instance
7917 source_node = instance.primary_node
7918 target_node = self.op.target_node
7919 env = _BuildInstanceHookEnvByObject(self, instance)
7921 "MIGRATE_LIVE": self._migrater.live,
7922 "MIGRATE_CLEANUP": self.op.cleanup,
7923 "OLD_PRIMARY": source_node,
7924 "NEW_PRIMARY": target_node,
7925 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7928 if instance.disk_template in constants.DTS_INT_MIRROR:
7929 env["OLD_SECONDARY"] = target_node
7930 env["NEW_SECONDARY"] = source_node
7932 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7936 def BuildHooksNodes(self):
7937 """Build hooks nodes.
7940 instance = self._migrater.instance
7941 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7942 return (nl, nl + [instance.primary_node])
7945 class LUInstanceMove(LogicalUnit):
7946 """Move an instance by data-copying.
7949 HPATH = "instance-move"
7950 HTYPE = constants.HTYPE_INSTANCE
7953 def ExpandNames(self):
7954 self._ExpandAndLockInstance()
7955 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7956 self.op.target_node = target_node
7957 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7958 self.needed_locks[locking.LEVEL_NODE_RES] = []
7959 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7961 def DeclareLocks(self, level):
7962 if level == locking.LEVEL_NODE:
7963 self._LockInstancesNodes(primary_only=True)
7964 elif level == locking.LEVEL_NODE_RES:
7966 self.needed_locks[locking.LEVEL_NODE_RES] = \
7967 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7969 def BuildHooksEnv(self):
7972 This runs on master, primary and secondary nodes of the instance.
7976 "TARGET_NODE": self.op.target_node,
7977 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7979 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7982 def BuildHooksNodes(self):
7983 """Build hooks nodes.
7987 self.cfg.GetMasterNode(),
7988 self.instance.primary_node,
7989 self.op.target_node,
7993 def CheckPrereq(self):
7994 """Check prerequisites.
7996 This checks that the instance is in the cluster.
7999 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8000 assert self.instance is not None, \
8001 "Cannot retrieve locked instance %s" % self.op.instance_name
8003 node = self.cfg.GetNodeInfo(self.op.target_node)
8004 assert node is not None, \
8005 "Cannot retrieve locked node %s" % self.op.target_node
8007 self.target_node = target_node = node.name
8009 if target_node == instance.primary_node:
8010 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8011 (instance.name, target_node),
8014 bep = self.cfg.GetClusterInfo().FillBE(instance)
8016 for idx, dsk in enumerate(instance.disks):
8017 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8018 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8019 " cannot copy" % idx, errors.ECODE_STATE)
8021 _CheckNodeOnline(self, target_node)
8022 _CheckNodeNotDrained(self, target_node)
8023 _CheckNodeVmCapable(self, target_node)
8024 cluster = self.cfg.GetClusterInfo()
8025 group_info = self.cfg.GetNodeGroup(node.group)
8026 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8027 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8028 ignore=self.op.ignore_ipolicy)
8030 if instance.admin_state == constants.ADMINST_UP:
8031 # check memory requirements on the secondary node
8032 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8033 instance.name, bep[constants.BE_MAXMEM],
8034 instance.hypervisor)
8036 self.LogInfo("Not checking memory on the secondary node as"
8037 " instance will not be started")
8039 # check bridge existance
8040 _CheckInstanceBridgesExist(self, instance, node=target_node)
8042 def Exec(self, feedback_fn):
8043 """Move an instance.
8045 The move is done by shutting it down on its present node, copying
8046 the data over (slow) and starting it on the new node.
8049 instance = self.instance
8051 source_node = instance.primary_node
8052 target_node = self.target_node
8054 self.LogInfo("Shutting down instance %s on source node %s",
8055 instance.name, source_node)
8057 assert (self.owned_locks(locking.LEVEL_NODE) ==
8058 self.owned_locks(locking.LEVEL_NODE_RES))
8060 result = self.rpc.call_instance_shutdown(source_node, instance,
8061 self.op.shutdown_timeout)
8062 msg = result.fail_msg
8064 if self.op.ignore_consistency:
8065 self.proc.LogWarning("Could not shutdown instance %s on node %s."
8066 " Proceeding anyway. Please make sure node"
8067 " %s is down. Error details: %s",
8068 instance.name, source_node, source_node, msg)
8070 raise errors.OpExecError("Could not shutdown instance %s on"
8072 (instance.name, source_node, msg))
8074 # create the target disks
8076 _CreateDisks(self, instance, target_node=target_node)
8077 except errors.OpExecError:
8078 self.LogWarning("Device creation failed, reverting...")
8080 _RemoveDisks(self, instance, target_node=target_node)
8082 self.cfg.ReleaseDRBDMinors(instance.name)
8085 cluster_name = self.cfg.GetClusterInfo().cluster_name
8088 # activate, get path, copy the data over
8089 for idx, disk in enumerate(instance.disks):
8090 self.LogInfo("Copying data for disk %d", idx)
8091 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8092 instance.name, True, idx)
8094 self.LogWarning("Can't assemble newly created disk %d: %s",
8095 idx, result.fail_msg)
8096 errs.append(result.fail_msg)
8098 dev_path = result.payload
8099 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8100 target_node, dev_path,
8103 self.LogWarning("Can't copy data over for disk %d: %s",
8104 idx, result.fail_msg)
8105 errs.append(result.fail_msg)
8109 self.LogWarning("Some disks failed to copy, aborting")
8111 _RemoveDisks(self, instance, target_node=target_node)
8113 self.cfg.ReleaseDRBDMinors(instance.name)
8114 raise errors.OpExecError("Errors during disk copy: %s" %
8117 instance.primary_node = target_node
8118 self.cfg.Update(instance, feedback_fn)
8120 self.LogInfo("Removing the disks on the original node")
8121 _RemoveDisks(self, instance, target_node=source_node)
8123 # Only start the instance if it's marked as up
8124 if instance.admin_state == constants.ADMINST_UP:
8125 self.LogInfo("Starting instance %s on node %s",
8126 instance.name, target_node)
8128 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8129 ignore_secondaries=True)
8131 _ShutdownInstanceDisks(self, instance)
8132 raise errors.OpExecError("Can't activate the instance's disks")
8134 result = self.rpc.call_instance_start(target_node,
8135 (instance, None, None), False)
8136 msg = result.fail_msg
8138 _ShutdownInstanceDisks(self, instance)
8139 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8140 (instance.name, target_node, msg))
8143 class LUNodeMigrate(LogicalUnit):
8144 """Migrate all instances from a node.
8147 HPATH = "node-migrate"
8148 HTYPE = constants.HTYPE_NODE
8151 def CheckArguments(self):
8154 def ExpandNames(self):
8155 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8157 self.share_locks = _ShareAll()
8158 self.needed_locks = {
8159 locking.LEVEL_NODE: [self.op.node_name],
8162 def BuildHooksEnv(self):
8165 This runs on the master, the primary and all the secondaries.
8169 "NODE_NAME": self.op.node_name,
8170 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8173 def BuildHooksNodes(self):
8174 """Build hooks nodes.
8177 nl = [self.cfg.GetMasterNode()]
8180 def CheckPrereq(self):
8183 def Exec(self, feedback_fn):
8184 # Prepare jobs for migration instances
8185 allow_runtime_changes = self.op.allow_runtime_changes
8187 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8190 iallocator=self.op.iallocator,
8191 target_node=self.op.target_node,
8192 allow_runtime_changes=allow_runtime_changes,
8193 ignore_ipolicy=self.op.ignore_ipolicy)]
8194 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8196 # TODO: Run iallocator in this opcode and pass correct placement options to
8197 # OpInstanceMigrate. Since other jobs can modify the cluster between
8198 # running the iallocator and the actual migration, a good consistency model
8199 # will have to be found.
8201 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8202 frozenset([self.op.node_name]))
8204 return ResultWithJobs(jobs)
8207 class TLMigrateInstance(Tasklet):
8208 """Tasklet class for instance migration.
8211 @ivar live: whether the migration will be done live or non-live;
8212 this variable is initalized only after CheckPrereq has run
8213 @type cleanup: boolean
8214 @ivar cleanup: Wheater we cleanup from a failed migration
8215 @type iallocator: string
8216 @ivar iallocator: The iallocator used to determine target_node
8217 @type target_node: string
8218 @ivar target_node: If given, the target_node to reallocate the instance to
8219 @type failover: boolean
8220 @ivar failover: Whether operation results in failover or migration
8221 @type fallback: boolean
8222 @ivar fallback: Whether fallback to failover is allowed if migration not
8224 @type ignore_consistency: boolean
8225 @ivar ignore_consistency: Wheter we should ignore consistency between source
8227 @type shutdown_timeout: int
8228 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8229 @type ignore_ipolicy: bool
8230 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8235 _MIGRATION_POLL_INTERVAL = 1 # seconds
8236 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8238 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8239 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8241 """Initializes this class.
8244 Tasklet.__init__(self, lu)
8247 self.instance_name = instance_name
8248 self.cleanup = cleanup
8249 self.live = False # will be overridden later
8250 self.failover = failover
8251 self.fallback = fallback
8252 self.ignore_consistency = ignore_consistency
8253 self.shutdown_timeout = shutdown_timeout
8254 self.ignore_ipolicy = ignore_ipolicy
8255 self.allow_runtime_changes = allow_runtime_changes
8257 def CheckPrereq(self):
8258 """Check prerequisites.
8260 This checks that the instance is in the cluster.
8263 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8264 instance = self.cfg.GetInstanceInfo(instance_name)
8265 assert instance is not None
8266 self.instance = instance
8267 cluster = self.cfg.GetClusterInfo()
8269 if (not self.cleanup and
8270 not instance.admin_state == constants.ADMINST_UP and
8271 not self.failover and self.fallback):
8272 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8273 " switching to failover")
8274 self.failover = True
8276 if instance.disk_template not in constants.DTS_MIRRORED:
8281 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8282 " %s" % (instance.disk_template, text),
8285 if instance.disk_template in constants.DTS_EXT_MIRROR:
8286 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8288 if self.lu.op.iallocator:
8289 self._RunAllocator()
8291 # We set set self.target_node as it is required by
8293 self.target_node = self.lu.op.target_node
8295 # Check that the target node is correct in terms of instance policy
8296 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8297 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8298 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8300 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8301 ignore=self.ignore_ipolicy)
8303 # self.target_node is already populated, either directly or by the
8305 target_node = self.target_node
8306 if self.target_node == instance.primary_node:
8307 raise errors.OpPrereqError("Cannot migrate instance %s"
8308 " to its primary (%s)" %
8309 (instance.name, instance.primary_node),
8312 if len(self.lu.tasklets) == 1:
8313 # It is safe to release locks only when we're the only tasklet
8315 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8316 keep=[instance.primary_node, self.target_node])
8319 secondary_nodes = instance.secondary_nodes
8320 if not secondary_nodes:
8321 raise errors.ConfigurationError("No secondary node but using"
8322 " %s disk template" %
8323 instance.disk_template)
8324 target_node = secondary_nodes[0]
8325 if self.lu.op.iallocator or (self.lu.op.target_node and
8326 self.lu.op.target_node != target_node):
8328 text = "failed over"
8331 raise errors.OpPrereqError("Instances with disk template %s cannot"
8332 " be %s to arbitrary nodes"
8333 " (neither an iallocator nor a target"
8334 " node can be passed)" %
8335 (instance.disk_template, text),
8337 nodeinfo = self.cfg.GetNodeInfo(target_node)
8338 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8339 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8341 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8342 ignore=self.ignore_ipolicy)
8344 i_be = cluster.FillBE(instance)
8346 # check memory requirements on the secondary node
8347 if (not self.cleanup and
8348 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8349 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8350 "migrating instance %s" %
8352 i_be[constants.BE_MINMEM],
8353 instance.hypervisor)
8355 self.lu.LogInfo("Not checking memory on the secondary node as"
8356 " instance will not be started")
8358 # check if failover must be forced instead of migration
8359 if (not self.cleanup and not self.failover and
8360 i_be[constants.BE_ALWAYS_FAILOVER]):
8361 self.lu.LogInfo("Instance configured to always failover; fallback"
8363 self.failover = True
8365 # check bridge existance
8366 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8368 if not self.cleanup:
8369 _CheckNodeNotDrained(self.lu, target_node)
8370 if not self.failover:
8371 result = self.rpc.call_instance_migratable(instance.primary_node,
8373 if result.fail_msg and self.fallback:
8374 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8376 self.failover = True
8378 result.Raise("Can't migrate, please use failover",
8379 prereq=True, ecode=errors.ECODE_STATE)
8381 assert not (self.failover and self.cleanup)
8383 if not self.failover:
8384 if self.lu.op.live is not None and self.lu.op.mode is not None:
8385 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8386 " parameters are accepted",
8388 if self.lu.op.live is not None:
8390 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8392 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8393 # reset the 'live' parameter to None so that repeated
8394 # invocations of CheckPrereq do not raise an exception
8395 self.lu.op.live = None
8396 elif self.lu.op.mode is None:
8397 # read the default value from the hypervisor
8398 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8399 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8401 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8403 # Failover is never live
8406 if not (self.failover or self.cleanup):
8407 remote_info = self.rpc.call_instance_info(instance.primary_node,
8409 instance.hypervisor)
8410 remote_info.Raise("Error checking instance on node %s" %
8411 instance.primary_node)
8412 instance_running = bool(remote_info.payload)
8413 if instance_running:
8414 self.current_mem = int(remote_info.payload["memory"])
8416 def _RunAllocator(self):
8417 """Run the allocator based on input opcode.
8420 # FIXME: add a self.ignore_ipolicy option
8421 req = iallocator.IAReqRelocate(name=self.instance_name,
8422 relocate_from=[self.instance.primary_node])
8423 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8425 ial.Run(self.lu.op.iallocator)
8428 raise errors.OpPrereqError("Can't compute nodes using"
8429 " iallocator '%s': %s" %
8430 (self.lu.op.iallocator, ial.info),
8432 self.target_node = ial.result[0]
8433 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8434 self.instance_name, self.lu.op.iallocator,
8435 utils.CommaJoin(ial.result))
8437 def _WaitUntilSync(self):
8438 """Poll with custom rpc for disk sync.
8440 This uses our own step-based rpc call.
8443 self.feedback_fn("* wait until resync is done")
8447 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8449 (self.instance.disks,
8452 for node, nres in result.items():
8453 nres.Raise("Cannot resync disks on node %s" % node)
8454 node_done, node_percent = nres.payload
8455 all_done = all_done and node_done
8456 if node_percent is not None:
8457 min_percent = min(min_percent, node_percent)
8459 if min_percent < 100:
8460 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8463 def _EnsureSecondary(self, node):
8464 """Demote a node to secondary.
8467 self.feedback_fn("* switching node %s to secondary mode" % node)
8469 for dev in self.instance.disks:
8470 self.cfg.SetDiskID(dev, node)
8472 result = self.rpc.call_blockdev_close(node, self.instance.name,
8473 self.instance.disks)
8474 result.Raise("Cannot change disk to secondary on node %s" % node)
8476 def _GoStandalone(self):
8477 """Disconnect from the network.
8480 self.feedback_fn("* changing into standalone mode")
8481 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8482 self.instance.disks)
8483 for node, nres in result.items():
8484 nres.Raise("Cannot disconnect disks node %s" % node)
8486 def _GoReconnect(self, multimaster):
8487 """Reconnect to the network.
8493 msg = "single-master"
8494 self.feedback_fn("* changing disks into %s mode" % msg)
8495 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8496 (self.instance.disks, self.instance),
8497 self.instance.name, multimaster)
8498 for node, nres in result.items():
8499 nres.Raise("Cannot change disks config on node %s" % node)
8501 def _ExecCleanup(self):
8502 """Try to cleanup after a failed migration.
8504 The cleanup is done by:
8505 - check that the instance is running only on one node
8506 (and update the config if needed)
8507 - change disks on its secondary node to secondary
8508 - wait until disks are fully synchronized
8509 - disconnect from the network
8510 - change disks into single-master mode
8511 - wait again until disks are fully synchronized
8514 instance = self.instance
8515 target_node = self.target_node
8516 source_node = self.source_node
8518 # check running on only one node
8519 self.feedback_fn("* checking where the instance actually runs"
8520 " (if this hangs, the hypervisor might be in"
8522 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8523 for node, result in ins_l.items():
8524 result.Raise("Can't contact node %s" % node)
8526 runningon_source = instance.name in ins_l[source_node].payload
8527 runningon_target = instance.name in ins_l[target_node].payload
8529 if runningon_source and runningon_target:
8530 raise errors.OpExecError("Instance seems to be running on two nodes,"
8531 " or the hypervisor is confused; you will have"
8532 " to ensure manually that it runs only on one"
8533 " and restart this operation")
8535 if not (runningon_source or runningon_target):
8536 raise errors.OpExecError("Instance does not seem to be running at all;"
8537 " in this case it's safer to repair by"
8538 " running 'gnt-instance stop' to ensure disk"
8539 " shutdown, and then restarting it")
8541 if runningon_target:
8542 # the migration has actually succeeded, we need to update the config
8543 self.feedback_fn("* instance running on secondary node (%s),"
8544 " updating config" % target_node)
8545 instance.primary_node = target_node
8546 self.cfg.Update(instance, self.feedback_fn)
8547 demoted_node = source_node
8549 self.feedback_fn("* instance confirmed to be running on its"
8550 " primary node (%s)" % source_node)
8551 demoted_node = target_node
8553 if instance.disk_template in constants.DTS_INT_MIRROR:
8554 self._EnsureSecondary(demoted_node)
8556 self._WaitUntilSync()
8557 except errors.OpExecError:
8558 # we ignore here errors, since if the device is standalone, it
8559 # won't be able to sync
8561 self._GoStandalone()
8562 self._GoReconnect(False)
8563 self._WaitUntilSync()
8565 self.feedback_fn("* done")
8567 def _RevertDiskStatus(self):
8568 """Try to revert the disk status after a failed migration.
8571 target_node = self.target_node
8572 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8576 self._EnsureSecondary(target_node)
8577 self._GoStandalone()
8578 self._GoReconnect(False)
8579 self._WaitUntilSync()
8580 except errors.OpExecError, err:
8581 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8582 " please try to recover the instance manually;"
8583 " error '%s'" % str(err))
8585 def _AbortMigration(self):
8586 """Call the hypervisor code to abort a started migration.
8589 instance = self.instance
8590 target_node = self.target_node
8591 source_node = self.source_node
8592 migration_info = self.migration_info
8594 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8598 abort_msg = abort_result.fail_msg
8600 logging.error("Aborting migration failed on target node %s: %s",
8601 target_node, abort_msg)
8602 # Don't raise an exception here, as we stil have to try to revert the
8603 # disk status, even if this step failed.
8605 abort_result = self.rpc.call_instance_finalize_migration_src(
8606 source_node, instance, False, self.live)
8607 abort_msg = abort_result.fail_msg
8609 logging.error("Aborting migration failed on source node %s: %s",
8610 source_node, abort_msg)
8612 def _ExecMigration(self):
8613 """Migrate an instance.
8615 The migrate is done by:
8616 - change the disks into dual-master mode
8617 - wait until disks are fully synchronized again
8618 - migrate the instance
8619 - change disks on the new secondary node (the old primary) to secondary
8620 - wait until disks are fully synchronized
8621 - change disks into single-master mode
8624 instance = self.instance
8625 target_node = self.target_node
8626 source_node = self.source_node
8628 # Check for hypervisor version mismatch and warn the user.
8629 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8630 None, [self.instance.hypervisor])
8631 for ninfo in nodeinfo.values():
8632 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8634 (_, _, (src_info, )) = nodeinfo[source_node].payload
8635 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8637 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8638 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8639 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8640 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8641 if src_version != dst_version:
8642 self.feedback_fn("* warning: hypervisor version mismatch between"
8643 " source (%s) and target (%s) node" %
8644 (src_version, dst_version))
8646 self.feedback_fn("* checking disk consistency between source and target")
8647 for (idx, dev) in enumerate(instance.disks):
8648 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8649 raise errors.OpExecError("Disk %s is degraded or not fully"
8650 " synchronized on target node,"
8651 " aborting migration" % idx)
8653 if self.current_mem > self.tgt_free_mem:
8654 if not self.allow_runtime_changes:
8655 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8656 " free memory to fit instance %s on target"
8657 " node %s (have %dMB, need %dMB)" %
8658 (instance.name, target_node,
8659 self.tgt_free_mem, self.current_mem))
8660 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8661 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8664 rpcres.Raise("Cannot modify instance runtime memory")
8666 # First get the migration information from the remote node
8667 result = self.rpc.call_migration_info(source_node, instance)
8668 msg = result.fail_msg
8670 log_err = ("Failed fetching source migration information from %s: %s" %
8672 logging.error(log_err)
8673 raise errors.OpExecError(log_err)
8675 self.migration_info = migration_info = result.payload
8677 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8678 # Then switch the disks to master/master mode
8679 self._EnsureSecondary(target_node)
8680 self._GoStandalone()
8681 self._GoReconnect(True)
8682 self._WaitUntilSync()
8684 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8685 result = self.rpc.call_accept_instance(target_node,
8688 self.nodes_ip[target_node])
8690 msg = result.fail_msg
8692 logging.error("Instance pre-migration failed, trying to revert"
8693 " disk status: %s", msg)
8694 self.feedback_fn("Pre-migration failed, aborting")
8695 self._AbortMigration()
8696 self._RevertDiskStatus()
8697 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8698 (instance.name, msg))
8700 self.feedback_fn("* migrating instance to %s" % target_node)
8701 result = self.rpc.call_instance_migrate(source_node, instance,
8702 self.nodes_ip[target_node],
8704 msg = result.fail_msg
8706 logging.error("Instance migration failed, trying to revert"
8707 " disk status: %s", msg)
8708 self.feedback_fn("Migration failed, aborting")
8709 self._AbortMigration()
8710 self._RevertDiskStatus()
8711 raise errors.OpExecError("Could not migrate instance %s: %s" %
8712 (instance.name, msg))
8714 self.feedback_fn("* starting memory transfer")
8715 last_feedback = time.time()
8717 result = self.rpc.call_instance_get_migration_status(source_node,
8719 msg = result.fail_msg
8720 ms = result.payload # MigrationStatus instance
8721 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8722 logging.error("Instance migration failed, trying to revert"
8723 " disk status: %s", msg)
8724 self.feedback_fn("Migration failed, aborting")
8725 self._AbortMigration()
8726 self._RevertDiskStatus()
8728 msg = "hypervisor returned failure"
8729 raise errors.OpExecError("Could not migrate instance %s: %s" %
8730 (instance.name, msg))
8732 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8733 self.feedback_fn("* memory transfer complete")
8736 if (utils.TimeoutExpired(last_feedback,
8737 self._MIGRATION_FEEDBACK_INTERVAL) and
8738 ms.transferred_ram is not None):
8739 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8740 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8741 last_feedback = time.time()
8743 time.sleep(self._MIGRATION_POLL_INTERVAL)
8745 result = self.rpc.call_instance_finalize_migration_src(source_node,
8749 msg = result.fail_msg
8751 logging.error("Instance migration succeeded, but finalization failed"
8752 " on the source node: %s", msg)
8753 raise errors.OpExecError("Could not finalize instance migration: %s" %
8756 instance.primary_node = target_node
8758 # distribute new instance config to the other nodes
8759 self.cfg.Update(instance, self.feedback_fn)
8761 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8765 msg = result.fail_msg
8767 logging.error("Instance migration succeeded, but finalization failed"
8768 " on the target node: %s", msg)
8769 raise errors.OpExecError("Could not finalize instance migration: %s" %
8772 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8773 self._EnsureSecondary(source_node)
8774 self._WaitUntilSync()
8775 self._GoStandalone()
8776 self._GoReconnect(False)
8777 self._WaitUntilSync()
8779 # If the instance's disk template is `rbd' and there was a successful
8780 # migration, unmap the device from the source node.
8781 if self.instance.disk_template == constants.DT_RBD:
8782 disks = _ExpandCheckDisks(instance, instance.disks)
8783 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8785 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8786 msg = result.fail_msg
8788 logging.error("Migration was successful, but couldn't unmap the"
8789 " block device %s on source node %s: %s",
8790 disk.iv_name, source_node, msg)
8791 logging.error("You need to unmap the device %s manually on %s",
8792 disk.iv_name, source_node)
8794 self.feedback_fn("* done")
8796 def _ExecFailover(self):
8797 """Failover an instance.
8799 The failover is done by shutting it down on its present node and
8800 starting it on the secondary.
8803 instance = self.instance
8804 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8806 source_node = instance.primary_node
8807 target_node = self.target_node
8809 if instance.admin_state == constants.ADMINST_UP:
8810 self.feedback_fn("* checking disk consistency between source and target")
8811 for (idx, dev) in enumerate(instance.disks):
8812 # for drbd, these are drbd over lvm
8813 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8815 if primary_node.offline:
8816 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8818 (primary_node.name, idx, target_node))
8819 elif not self.ignore_consistency:
8820 raise errors.OpExecError("Disk %s is degraded on target node,"
8821 " aborting failover" % idx)
8823 self.feedback_fn("* not checking disk consistency as instance is not"
8826 self.feedback_fn("* shutting down instance on source node")
8827 logging.info("Shutting down instance %s on node %s",
8828 instance.name, source_node)
8830 result = self.rpc.call_instance_shutdown(source_node, instance,
8831 self.shutdown_timeout)
8832 msg = result.fail_msg
8834 if self.ignore_consistency or primary_node.offline:
8835 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8836 " proceeding anyway; please make sure node"
8837 " %s is down; error details: %s",
8838 instance.name, source_node, source_node, msg)
8840 raise errors.OpExecError("Could not shutdown instance %s on"
8842 (instance.name, source_node, msg))
8844 self.feedback_fn("* deactivating the instance's disks on source node")
8845 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8846 raise errors.OpExecError("Can't shut down the instance's disks")
8848 instance.primary_node = target_node
8849 # distribute new instance config to the other nodes
8850 self.cfg.Update(instance, self.feedback_fn)
8852 # Only start the instance if it's marked as up
8853 if instance.admin_state == constants.ADMINST_UP:
8854 self.feedback_fn("* activating the instance's disks on target node %s" %
8856 logging.info("Starting instance %s on node %s",
8857 instance.name, target_node)
8859 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8860 ignore_secondaries=True)
8862 _ShutdownInstanceDisks(self.lu, instance)
8863 raise errors.OpExecError("Can't activate the instance's disks")
8865 self.feedback_fn("* starting the instance on the target node %s" %
8867 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8869 msg = result.fail_msg
8871 _ShutdownInstanceDisks(self.lu, instance)
8872 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8873 (instance.name, target_node, msg))
8875 def Exec(self, feedback_fn):
8876 """Perform the migration.
8879 self.feedback_fn = feedback_fn
8880 self.source_node = self.instance.primary_node
8882 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8883 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8884 self.target_node = self.instance.secondary_nodes[0]
8885 # Otherwise self.target_node has been populated either
8886 # directly, or through an iallocator.
8888 self.all_nodes = [self.source_node, self.target_node]
8889 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8890 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8893 feedback_fn("Failover instance %s" % self.instance.name)
8894 self._ExecFailover()
8896 feedback_fn("Migrating instance %s" % self.instance.name)
8899 return self._ExecCleanup()
8901 return self._ExecMigration()
8904 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8906 """Wrapper around L{_CreateBlockDevInner}.
8908 This method annotates the root device first.
8911 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8912 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8916 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8918 """Create a tree of block devices on a given node.
8920 If this device type has to be created on secondaries, create it and
8923 If not, just recurse to children keeping the same 'force' value.
8925 @attention: The device has to be annotated already.
8927 @param lu: the lu on whose behalf we execute
8928 @param node: the node on which to create the device
8929 @type instance: L{objects.Instance}
8930 @param instance: the instance which owns the device
8931 @type device: L{objects.Disk}
8932 @param device: the device to create
8933 @type force_create: boolean
8934 @param force_create: whether to force creation of this device; this
8935 will be change to True whenever we find a device which has
8936 CreateOnSecondary() attribute
8937 @param info: the extra 'metadata' we should attach to the device
8938 (this will be represented as a LVM tag)
8939 @type force_open: boolean
8940 @param force_open: this parameter will be passes to the
8941 L{backend.BlockdevCreate} function where it specifies
8942 whether we run on primary or not, and it affects both
8943 the child assembly and the device own Open() execution
8946 if device.CreateOnSecondary():
8950 for child in device.children:
8951 _CreateBlockDevInner(lu, node, instance, child, force_create,
8954 if not force_create:
8957 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8960 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8961 """Create a single block device on a given node.
8963 This will not recurse over children of the device, so they must be
8966 @param lu: the lu on whose behalf we execute
8967 @param node: the node on which to create the device
8968 @type instance: L{objects.Instance}
8969 @param instance: the instance which owns the device
8970 @type device: L{objects.Disk}
8971 @param device: the device to create
8972 @param info: the extra 'metadata' we should attach to the device
8973 (this will be represented as a LVM tag)
8974 @type force_open: boolean
8975 @param force_open: this parameter will be passes to the
8976 L{backend.BlockdevCreate} function where it specifies
8977 whether we run on primary or not, and it affects both
8978 the child assembly and the device own Open() execution
8981 lu.cfg.SetDiskID(device, node)
8982 result = lu.rpc.call_blockdev_create(node, device, device.size,
8983 instance.name, force_open, info)
8984 result.Raise("Can't create block device %s on"
8985 " node %s for instance %s" % (device, node, instance.name))
8986 if device.physical_id is None:
8987 device.physical_id = result.payload
8990 def _GenerateUniqueNames(lu, exts):
8991 """Generate a suitable LV name.
8993 This will generate a logical volume name for the given instance.
8998 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8999 results.append("%s%s" % (new_id, val))
9003 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9004 iv_name, p_minor, s_minor):
9005 """Generate a drbd8 device complete with its children.
9008 assert len(vgnames) == len(names) == 2
9009 port = lu.cfg.AllocatePort()
9010 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9012 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9013 logical_id=(vgnames[0], names[0]),
9015 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9016 size=constants.DRBD_META_SIZE,
9017 logical_id=(vgnames[1], names[1]),
9019 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9020 logical_id=(primary, secondary, port,
9023 children=[dev_data, dev_meta],
9024 iv_name=iv_name, params={})
9028 _DISK_TEMPLATE_NAME_PREFIX = {
9029 constants.DT_PLAIN: "",
9030 constants.DT_RBD: ".rbd",
9034 _DISK_TEMPLATE_DEVICE_TYPE = {
9035 constants.DT_PLAIN: constants.LD_LV,
9036 constants.DT_FILE: constants.LD_FILE,
9037 constants.DT_SHARED_FILE: constants.LD_FILE,
9038 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9039 constants.DT_RBD: constants.LD_RBD,
9043 def _GenerateDiskTemplate(
9044 lu, template_name, instance_name, primary_node, secondary_nodes,
9045 disk_info, file_storage_dir, file_driver, base_index,
9046 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9047 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9048 """Generate the entire disk layout for a given template type.
9051 #TODO: compute space requirements
9053 vgname = lu.cfg.GetVGName()
9054 disk_count = len(disk_info)
9057 if template_name == constants.DT_DISKLESS:
9059 elif template_name == constants.DT_DRBD8:
9060 if len(secondary_nodes) != 1:
9061 raise errors.ProgrammerError("Wrong template configuration")
9062 remote_node = secondary_nodes[0]
9063 minors = lu.cfg.AllocateDRBDMinor(
9064 [primary_node, remote_node] * len(disk_info), instance_name)
9066 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9068 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9071 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9072 for i in range(disk_count)]):
9073 names.append(lv_prefix + "_data")
9074 names.append(lv_prefix + "_meta")
9075 for idx, disk in enumerate(disk_info):
9076 disk_index = idx + base_index
9077 data_vg = disk.get(constants.IDISK_VG, vgname)
9078 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9079 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9080 disk[constants.IDISK_SIZE],
9082 names[idx * 2:idx * 2 + 2],
9083 "disk/%d" % disk_index,
9084 minors[idx * 2], minors[idx * 2 + 1])
9085 disk_dev.mode = disk[constants.IDISK_MODE]
9086 disks.append(disk_dev)
9089 raise errors.ProgrammerError("Wrong template configuration")
9091 if template_name == constants.DT_FILE:
9093 elif template_name == constants.DT_SHARED_FILE:
9094 _req_shr_file_storage()
9096 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9097 if name_prefix is None:
9100 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9101 (name_prefix, base_index + i)
9102 for i in range(disk_count)])
9104 if template_name == constants.DT_PLAIN:
9106 def logical_id_fn(idx, _, disk):
9107 vg = disk.get(constants.IDISK_VG, vgname)
9108 return (vg, names[idx])
9110 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9112 lambda _, disk_index, disk: (file_driver,
9113 "%s/disk%d" % (file_storage_dir,
9115 elif template_name == constants.DT_BLOCK:
9117 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9118 disk[constants.IDISK_ADOPT])
9119 elif template_name == constants.DT_RBD:
9120 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9122 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9124 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9126 for idx, disk in enumerate(disk_info):
9127 disk_index = idx + base_index
9128 size = disk[constants.IDISK_SIZE]
9129 feedback_fn("* disk %s, size %s" %
9130 (disk_index, utils.FormatUnit(size, "h")))
9131 disks.append(objects.Disk(dev_type=dev_type, size=size,
9132 logical_id=logical_id_fn(idx, disk_index, disk),
9133 iv_name="disk/%d" % disk_index,
9134 mode=disk[constants.IDISK_MODE],
9140 def _GetInstanceInfoText(instance):
9141 """Compute that text that should be added to the disk's metadata.
9144 return "originstname+%s" % instance.name
9147 def _CalcEta(time_taken, written, total_size):
9148 """Calculates the ETA based on size written and total size.
9150 @param time_taken: The time taken so far
9151 @param written: amount written so far
9152 @param total_size: The total size of data to be written
9153 @return: The remaining time in seconds
9156 avg_time = time_taken / float(written)
9157 return (total_size - written) * avg_time
9160 def _WipeDisks(lu, instance, disks=None):
9161 """Wipes instance disks.
9163 @type lu: L{LogicalUnit}
9164 @param lu: the logical unit on whose behalf we execute
9165 @type instance: L{objects.Instance}
9166 @param instance: the instance whose disks we should create
9167 @return: the success of the wipe
9170 node = instance.primary_node
9173 disks = [(idx, disk, 0)
9174 for (idx, disk) in enumerate(instance.disks)]
9176 for (_, device, _) in disks:
9177 lu.cfg.SetDiskID(device, node)
9179 logging.info("Pausing synchronization of disks of instance '%s'",
9181 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9182 (map(compat.snd, disks),
9185 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9187 for idx, success in enumerate(result.payload):
9189 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9190 " failed", idx, instance.name)
9193 for (idx, device, offset) in disks:
9194 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9195 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9197 int(min(constants.MAX_WIPE_CHUNK,
9198 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9202 start_time = time.time()
9207 info_text = (" (from %s to %s)" %
9208 (utils.FormatUnit(offset, "h"),
9209 utils.FormatUnit(size, "h")))
9211 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9213 logging.info("Wiping disk %d for instance %s on node %s using"
9214 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9216 while offset < size:
9217 wipe_size = min(wipe_chunk_size, size - offset)
9219 logging.debug("Wiping disk %d, offset %s, chunk %s",
9220 idx, offset, wipe_size)
9222 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9224 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9225 (idx, offset, wipe_size))
9229 if now - last_output >= 60:
9230 eta = _CalcEta(now - start_time, offset, size)
9231 lu.LogInfo(" - done: %.1f%% ETA: %s",
9232 offset / float(size) * 100, utils.FormatSeconds(eta))
9235 logging.info("Resuming synchronization of disks for instance '%s'",
9238 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9239 (map(compat.snd, disks),
9244 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9245 node, result.fail_msg)
9247 for idx, success in enumerate(result.payload):
9249 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9250 " failed", idx, instance.name)
9253 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9254 """Create all disks for an instance.
9256 This abstracts away some work from AddInstance.
9258 @type lu: L{LogicalUnit}
9259 @param lu: the logical unit on whose behalf we execute
9260 @type instance: L{objects.Instance}
9261 @param instance: the instance whose disks we should create
9263 @param to_skip: list of indices to skip
9264 @type target_node: string
9265 @param target_node: if passed, overrides the target node for creation
9267 @return: the success of the creation
9270 info = _GetInstanceInfoText(instance)
9271 if target_node is None:
9272 pnode = instance.primary_node
9273 all_nodes = instance.all_nodes
9278 if instance.disk_template in constants.DTS_FILEBASED:
9279 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9280 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9282 result.Raise("Failed to create directory '%s' on"
9283 " node %s" % (file_storage_dir, pnode))
9285 # Note: this needs to be kept in sync with adding of disks in
9286 # LUInstanceSetParams
9287 for idx, device in enumerate(instance.disks):
9288 if to_skip and idx in to_skip:
9290 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9292 for node in all_nodes:
9293 f_create = node == pnode
9294 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9297 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9298 """Remove all disks for an instance.
9300 This abstracts away some work from `AddInstance()` and
9301 `RemoveInstance()`. Note that in case some of the devices couldn't
9302 be removed, the removal will continue with the other ones (compare
9303 with `_CreateDisks()`).
9305 @type lu: L{LogicalUnit}
9306 @param lu: the logical unit on whose behalf we execute
9307 @type instance: L{objects.Instance}
9308 @param instance: the instance whose disks we should remove
9309 @type target_node: string
9310 @param target_node: used to override the node on which to remove the disks
9312 @return: the success of the removal
9315 logging.info("Removing block devices for instance %s", instance.name)
9318 ports_to_release = set()
9319 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9320 for (idx, device) in enumerate(anno_disks):
9322 edata = [(target_node, device)]
9324 edata = device.ComputeNodeTree(instance.primary_node)
9325 for node, disk in edata:
9326 lu.cfg.SetDiskID(disk, node)
9327 result = lu.rpc.call_blockdev_remove(node, disk)
9329 lu.LogWarning("Could not remove disk %s on node %s,"
9330 " continuing anyway: %s", idx, node, result.fail_msg)
9331 if not (result.offline and node != instance.primary_node):
9334 # if this is a DRBD disk, return its port to the pool
9335 if device.dev_type in constants.LDS_DRBD:
9336 ports_to_release.add(device.logical_id[2])
9338 if all_result or ignore_failures:
9339 for port in ports_to_release:
9340 lu.cfg.AddTcpUdpPort(port)
9342 if instance.disk_template in constants.DTS_FILEBASED:
9343 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9347 tgt = instance.primary_node
9348 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9350 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9351 file_storage_dir, instance.primary_node, result.fail_msg)
9357 def _ComputeDiskSizePerVG(disk_template, disks):
9358 """Compute disk size requirements in the volume group
9361 def _compute(disks, payload):
9362 """Universal algorithm.
9367 vgs[disk[constants.IDISK_VG]] = \
9368 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9372 # Required free disk space as a function of disk and swap space
9374 constants.DT_DISKLESS: {},
9375 constants.DT_PLAIN: _compute(disks, 0),
9376 # 128 MB are added for drbd metadata for each disk
9377 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9378 constants.DT_FILE: {},
9379 constants.DT_SHARED_FILE: {},
9382 if disk_template not in req_size_dict:
9383 raise errors.ProgrammerError("Disk template '%s' size requirement"
9384 " is unknown" % disk_template)
9386 return req_size_dict[disk_template]
9389 def _FilterVmNodes(lu, nodenames):
9390 """Filters out non-vm_capable nodes from a list.
9392 @type lu: L{LogicalUnit}
9393 @param lu: the logical unit for which we check
9394 @type nodenames: list
9395 @param nodenames: the list of nodes on which we should check
9397 @return: the list of vm-capable nodes
9400 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9401 return [name for name in nodenames if name not in vm_nodes]
9404 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9405 """Hypervisor parameter validation.
9407 This function abstract the hypervisor parameter validation to be
9408 used in both instance create and instance modify.
9410 @type lu: L{LogicalUnit}
9411 @param lu: the logical unit for which we check
9412 @type nodenames: list
9413 @param nodenames: the list of nodes on which we should check
9414 @type hvname: string
9415 @param hvname: the name of the hypervisor we should use
9416 @type hvparams: dict
9417 @param hvparams: the parameters which we need to check
9418 @raise errors.OpPrereqError: if the parameters are not valid
9421 nodenames = _FilterVmNodes(lu, nodenames)
9423 cluster = lu.cfg.GetClusterInfo()
9424 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9426 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9427 for node in nodenames:
9431 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9434 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9435 """OS parameters validation.
9437 @type lu: L{LogicalUnit}
9438 @param lu: the logical unit for which we check
9439 @type required: boolean
9440 @param required: whether the validation should fail if the OS is not
9442 @type nodenames: list
9443 @param nodenames: the list of nodes on which we should check
9444 @type osname: string
9445 @param osname: the name of the hypervisor we should use
9446 @type osparams: dict
9447 @param osparams: the parameters which we need to check
9448 @raise errors.OpPrereqError: if the parameters are not valid
9451 nodenames = _FilterVmNodes(lu, nodenames)
9452 result = lu.rpc.call_os_validate(nodenames, required, osname,
9453 [constants.OS_VALIDATE_PARAMETERS],
9455 for node, nres in result.items():
9456 # we don't check for offline cases since this should be run only
9457 # against the master node and/or an instance's nodes
9458 nres.Raise("OS Parameters validation failed on node %s" % node)
9459 if not nres.payload:
9460 lu.LogInfo("OS %s not found on node %s, validation skipped",
9464 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9465 """Wrapper around IAReqInstanceAlloc.
9467 @param op: The instance opcode
9468 @param disks: The computed disks
9469 @param nics: The computed nics
9470 @param beparams: The full filled beparams
9472 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9475 spindle_use = beparams[constants.BE_SPINDLE_USE]
9476 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9477 disk_template=op.disk_template,
9480 vcpus=beparams[constants.BE_VCPUS],
9481 memory=beparams[constants.BE_MAXMEM],
9482 spindle_use=spindle_use,
9484 nics=[n.ToDict() for n in nics],
9485 hypervisor=op.hypervisor)
9488 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9489 """Computes the nics.
9491 @param op: The instance opcode
9492 @param cluster: Cluster configuration object
9493 @param default_ip: The default ip to assign
9494 @param cfg: An instance of the configuration object
9495 @param proc: The executer instance
9497 @returns: The build up nics
9502 nic_mode_req = nic.get(constants.INIC_MODE, None)
9503 nic_mode = nic_mode_req
9504 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9505 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9507 net = nic.get(constants.INIC_NETWORK, None)
9508 link = nic.get(constants.NIC_LINK, None)
9509 ip = nic.get(constants.INIC_IP, None)
9511 if net is None or net.lower() == constants.VALUE_NONE:
9514 if nic_mode_req is not None or link is not None:
9515 raise errors.OpPrereqError("If network is given, no mode or link"
9516 " is allowed to be passed",
9519 # ip validity checks
9520 if ip is None or ip.lower() == constants.VALUE_NONE:
9522 elif ip.lower() == constants.VALUE_AUTO:
9523 if not op.name_check:
9524 raise errors.OpPrereqError("IP address set to auto but name checks"
9525 " have been skipped",
9529 # We defer pool operations until later, so that the iallocator has
9530 # filled in the instance's node(s) dimara
9531 if ip.lower() == constants.NIC_IP_POOL:
9533 raise errors.OpPrereqError("if ip=pool, parameter network"
9534 " must be passed too",
9537 elif not netutils.IPAddress.IsValid(ip):
9538 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9543 # TODO: check the ip address for uniqueness
9544 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9545 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9548 # MAC address verification
9549 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9550 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9551 mac = utils.NormalizeAndValidateMac(mac)
9554 # TODO: We need to factor this out
9555 cfg.ReserveMAC(mac, proc.GetECId())
9556 except errors.ReservationError:
9557 raise errors.OpPrereqError("MAC address %s already in use"
9558 " in cluster" % mac,
9559 errors.ECODE_NOTUNIQUE)
9561 # Build nic parameters
9564 nicparams[constants.NIC_MODE] = nic_mode
9566 nicparams[constants.NIC_LINK] = link
9568 check_params = cluster.SimpleFillNIC(nicparams)
9569 objects.NIC.CheckParameterSyntax(check_params)
9570 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9571 network=net, nicparams=nicparams))
9576 def _ComputeDisks(op, default_vg):
9577 """Computes the instance disks.
9579 @param op: The instance opcode
9580 @param default_vg: The default_vg to assume
9582 @return: The computer disks
9586 for disk in op.disks:
9587 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9588 if mode not in constants.DISK_ACCESS_SET:
9589 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9590 mode, errors.ECODE_INVAL)
9591 size = disk.get(constants.IDISK_SIZE, None)
9593 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9596 except (TypeError, ValueError):
9597 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9600 data_vg = disk.get(constants.IDISK_VG, default_vg)
9602 constants.IDISK_SIZE: size,
9603 constants.IDISK_MODE: mode,
9604 constants.IDISK_VG: data_vg,
9606 if constants.IDISK_METAVG in disk:
9607 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9608 if constants.IDISK_ADOPT in disk:
9609 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9610 disks.append(new_disk)
9615 def _ComputeFullBeParams(op, cluster):
9616 """Computes the full beparams.
9618 @param op: The instance opcode
9619 @param cluster: The cluster config object
9621 @return: The fully filled beparams
9624 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9625 for param, value in op.beparams.iteritems():
9626 if value == constants.VALUE_AUTO:
9627 op.beparams[param] = default_beparams[param]
9628 objects.UpgradeBeParams(op.beparams)
9629 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9630 return cluster.SimpleFillBE(op.beparams)
9633 class LUInstanceCreate(LogicalUnit):
9634 """Create an instance.
9637 HPATH = "instance-add"
9638 HTYPE = constants.HTYPE_INSTANCE
9641 def CheckArguments(self):
9645 # do not require name_check to ease forward/backward compatibility
9647 if self.op.no_install and self.op.start:
9648 self.LogInfo("No-installation mode selected, disabling startup")
9649 self.op.start = False
9650 # validate/normalize the instance name
9651 self.op.instance_name = \
9652 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9654 if self.op.ip_check and not self.op.name_check:
9655 # TODO: make the ip check more flexible and not depend on the name check
9656 raise errors.OpPrereqError("Cannot do IP address check without a name"
9657 " check", errors.ECODE_INVAL)
9659 # check nics' parameter names
9660 for nic in self.op.nics:
9661 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9663 # check disks. parameter names and consistent adopt/no-adopt strategy
9664 has_adopt = has_no_adopt = False
9665 for disk in self.op.disks:
9666 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9667 if constants.IDISK_ADOPT in disk:
9671 if has_adopt and has_no_adopt:
9672 raise errors.OpPrereqError("Either all disks are adopted or none is",
9675 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9676 raise errors.OpPrereqError("Disk adoption is not supported for the"
9677 " '%s' disk template" %
9678 self.op.disk_template,
9680 if self.op.iallocator is not None:
9681 raise errors.OpPrereqError("Disk adoption not allowed with an"
9682 " iallocator script", errors.ECODE_INVAL)
9683 if self.op.mode == constants.INSTANCE_IMPORT:
9684 raise errors.OpPrereqError("Disk adoption not allowed for"
9685 " instance import", errors.ECODE_INVAL)
9687 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9688 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9689 " but no 'adopt' parameter given" %
9690 self.op.disk_template,
9693 self.adopt_disks = has_adopt
9695 # instance name verification
9696 if self.op.name_check:
9697 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9698 self.op.instance_name = self.hostname1.name
9699 # used in CheckPrereq for ip ping check
9700 self.check_ip = self.hostname1.ip
9702 self.check_ip = None
9704 # file storage checks
9705 if (self.op.file_driver and
9706 not self.op.file_driver in constants.FILE_DRIVER):
9707 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9708 self.op.file_driver, errors.ECODE_INVAL)
9710 if self.op.disk_template == constants.DT_FILE:
9711 opcodes.RequireFileStorage()
9712 elif self.op.disk_template == constants.DT_SHARED_FILE:
9713 opcodes.RequireSharedFileStorage()
9715 ### Node/iallocator related checks
9716 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9718 if self.op.pnode is not None:
9719 if self.op.disk_template in constants.DTS_INT_MIRROR:
9720 if self.op.snode is None:
9721 raise errors.OpPrereqError("The networked disk templates need"
9722 " a mirror node", errors.ECODE_INVAL)
9724 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9726 self.op.snode = None
9728 self._cds = _GetClusterDomainSecret()
9730 if self.op.mode == constants.INSTANCE_IMPORT:
9731 # On import force_variant must be True, because if we forced it at
9732 # initial install, our only chance when importing it back is that it
9734 self.op.force_variant = True
9736 if self.op.no_install:
9737 self.LogInfo("No-installation mode has no effect during import")
9739 elif self.op.mode == constants.INSTANCE_CREATE:
9740 if self.op.os_type is None:
9741 raise errors.OpPrereqError("No guest OS specified",
9743 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9744 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9745 " installation" % self.op.os_type,
9747 if self.op.disk_template is None:
9748 raise errors.OpPrereqError("No disk template specified",
9751 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9752 # Check handshake to ensure both clusters have the same domain secret
9753 src_handshake = self.op.source_handshake
9754 if not src_handshake:
9755 raise errors.OpPrereqError("Missing source handshake",
9758 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9761 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9764 # Load and check source CA
9765 self.source_x509_ca_pem = self.op.source_x509_ca
9766 if not self.source_x509_ca_pem:
9767 raise errors.OpPrereqError("Missing source X509 CA",
9771 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9773 except OpenSSL.crypto.Error, err:
9774 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9775 (err, ), errors.ECODE_INVAL)
9777 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9778 if errcode is not None:
9779 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9782 self.source_x509_ca = cert
9784 src_instance_name = self.op.source_instance_name
9785 if not src_instance_name:
9786 raise errors.OpPrereqError("Missing source instance name",
9789 self.source_instance_name = \
9790 netutils.GetHostname(name=src_instance_name).name
9793 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9794 self.op.mode, errors.ECODE_INVAL)
9796 def ExpandNames(self):
9797 """ExpandNames for CreateInstance.
9799 Figure out the right locks for instance creation.
9802 self.needed_locks = {}
9804 instance_name = self.op.instance_name
9805 # this is just a preventive check, but someone might still add this
9806 # instance in the meantime, and creation will fail at lock-add time
9807 if instance_name in self.cfg.GetInstanceList():
9808 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9809 instance_name, errors.ECODE_EXISTS)
9811 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9813 if self.op.iallocator:
9814 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9815 # specifying a group on instance creation and then selecting nodes from
9817 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9818 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9820 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9821 nodelist = [self.op.pnode]
9822 if self.op.snode is not None:
9823 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9824 nodelist.append(self.op.snode)
9825 self.needed_locks[locking.LEVEL_NODE] = nodelist
9826 # Lock resources of instance's primary and secondary nodes (copy to
9827 # prevent accidential modification)
9828 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9830 # in case of import lock the source node too
9831 if self.op.mode == constants.INSTANCE_IMPORT:
9832 src_node = self.op.src_node
9833 src_path = self.op.src_path
9835 if src_path is None:
9836 self.op.src_path = src_path = self.op.instance_name
9838 if src_node is None:
9839 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9840 self.op.src_node = None
9841 if os.path.isabs(src_path):
9842 raise errors.OpPrereqError("Importing an instance from a path"
9843 " requires a source node option",
9846 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9847 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9848 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9849 if not os.path.isabs(src_path):
9850 self.op.src_path = src_path = \
9851 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9853 def _RunAllocator(self):
9854 """Run the allocator based on input opcode.
9857 #TODO Export network to iallocator so that it chooses a pnode
9858 # in a nodegroup that has the desired network connected to
9859 req = _CreateInstanceAllocRequest(self.op, self.disks,
9860 self.nics, self.be_full)
9861 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9863 ial.Run(self.op.iallocator)
9866 raise errors.OpPrereqError("Can't compute nodes using"
9867 " iallocator '%s': %s" %
9868 (self.op.iallocator, ial.info),
9870 self.op.pnode = ial.result[0]
9871 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9872 self.op.instance_name, self.op.iallocator,
9873 utils.CommaJoin(ial.result))
9875 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9877 if req.RequiredNodes() == 2:
9878 self.op.snode = ial.result[1]
9880 def BuildHooksEnv(self):
9883 This runs on master, primary and secondary nodes of the instance.
9887 "ADD_MODE": self.op.mode,
9889 if self.op.mode == constants.INSTANCE_IMPORT:
9890 env["SRC_NODE"] = self.op.src_node
9891 env["SRC_PATH"] = self.op.src_path
9892 env["SRC_IMAGES"] = self.src_images
9894 env.update(_BuildInstanceHookEnv(
9895 name=self.op.instance_name,
9896 primary_node=self.op.pnode,
9897 secondary_nodes=self.secondaries,
9898 status=self.op.start,
9899 os_type=self.op.os_type,
9900 minmem=self.be_full[constants.BE_MINMEM],
9901 maxmem=self.be_full[constants.BE_MAXMEM],
9902 vcpus=self.be_full[constants.BE_VCPUS],
9903 nics=_NICListToTuple(self, self.nics),
9904 disk_template=self.op.disk_template,
9905 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9906 for d in self.disks],
9909 hypervisor_name=self.op.hypervisor,
9915 def BuildHooksNodes(self):
9916 """Build hooks nodes.
9919 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9922 def _ReadExportInfo(self):
9923 """Reads the export information from disk.
9925 It will override the opcode source node and path with the actual
9926 information, if these two were not specified before.
9928 @return: the export information
9931 assert self.op.mode == constants.INSTANCE_IMPORT
9933 src_node = self.op.src_node
9934 src_path = self.op.src_path
9936 if src_node is None:
9937 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9938 exp_list = self.rpc.call_export_list(locked_nodes)
9940 for node in exp_list:
9941 if exp_list[node].fail_msg:
9943 if src_path in exp_list[node].payload:
9945 self.op.src_node = src_node = node
9946 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9950 raise errors.OpPrereqError("No export found for relative path %s" %
9951 src_path, errors.ECODE_INVAL)
9953 _CheckNodeOnline(self, src_node)
9954 result = self.rpc.call_export_info(src_node, src_path)
9955 result.Raise("No export or invalid export found in dir %s" % src_path)
9957 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9958 if not export_info.has_section(constants.INISECT_EXP):
9959 raise errors.ProgrammerError("Corrupted export config",
9960 errors.ECODE_ENVIRON)
9962 ei_version = export_info.get(constants.INISECT_EXP, "version")
9963 if (int(ei_version) != constants.EXPORT_VERSION):
9964 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9965 (ei_version, constants.EXPORT_VERSION),
9966 errors.ECODE_ENVIRON)
9969 def _ReadExportParams(self, einfo):
9970 """Use export parameters as defaults.
9972 In case the opcode doesn't specify (as in override) some instance
9973 parameters, then try to use them from the export information, if
9977 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9979 if self.op.disk_template is None:
9980 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9981 self.op.disk_template = einfo.get(constants.INISECT_INS,
9983 if self.op.disk_template not in constants.DISK_TEMPLATES:
9984 raise errors.OpPrereqError("Disk template specified in configuration"
9985 " file is not one of the allowed values:"
9987 " ".join(constants.DISK_TEMPLATES),
9990 raise errors.OpPrereqError("No disk template specified and the export"
9991 " is missing the disk_template information",
9994 if not self.op.disks:
9996 # TODO: import the disk iv_name too
9997 for idx in range(constants.MAX_DISKS):
9998 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9999 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10000 disks.append({constants.IDISK_SIZE: disk_sz})
10001 self.op.disks = disks
10002 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10003 raise errors.OpPrereqError("No disk info specified and the export"
10004 " is missing the disk information",
10005 errors.ECODE_INVAL)
10007 if not self.op.nics:
10009 for idx in range(constants.MAX_NICS):
10010 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10012 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10013 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10018 self.op.nics = nics
10020 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10021 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10023 if (self.op.hypervisor is None and
10024 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10025 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10027 if einfo.has_section(constants.INISECT_HYP):
10028 # use the export parameters but do not override the ones
10029 # specified by the user
10030 for name, value in einfo.items(constants.INISECT_HYP):
10031 if name not in self.op.hvparams:
10032 self.op.hvparams[name] = value
10034 if einfo.has_section(constants.INISECT_BEP):
10035 # use the parameters, without overriding
10036 for name, value in einfo.items(constants.INISECT_BEP):
10037 if name not in self.op.beparams:
10038 self.op.beparams[name] = value
10039 # Compatibility for the old "memory" be param
10040 if name == constants.BE_MEMORY:
10041 if constants.BE_MAXMEM not in self.op.beparams:
10042 self.op.beparams[constants.BE_MAXMEM] = value
10043 if constants.BE_MINMEM not in self.op.beparams:
10044 self.op.beparams[constants.BE_MINMEM] = value
10046 # try to read the parameters old style, from the main section
10047 for name in constants.BES_PARAMETERS:
10048 if (name not in self.op.beparams and
10049 einfo.has_option(constants.INISECT_INS, name)):
10050 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10052 if einfo.has_section(constants.INISECT_OSP):
10053 # use the parameters, without overriding
10054 for name, value in einfo.items(constants.INISECT_OSP):
10055 if name not in self.op.osparams:
10056 self.op.osparams[name] = value
10058 def _RevertToDefaults(self, cluster):
10059 """Revert the instance parameters to the default values.
10063 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10064 for name in self.op.hvparams.keys():
10065 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10066 del self.op.hvparams[name]
10068 be_defs = cluster.SimpleFillBE({})
10069 for name in self.op.beparams.keys():
10070 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10071 del self.op.beparams[name]
10073 nic_defs = cluster.SimpleFillNIC({})
10074 for nic in self.op.nics:
10075 for name in constants.NICS_PARAMETERS:
10076 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10079 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10080 for name in self.op.osparams.keys():
10081 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10082 del self.op.osparams[name]
10084 def _CalculateFileStorageDir(self):
10085 """Calculate final instance file storage dir.
10088 # file storage dir calculation/check
10089 self.instance_file_storage_dir = None
10090 if self.op.disk_template in constants.DTS_FILEBASED:
10091 # build the full file storage dir path
10094 if self.op.disk_template == constants.DT_SHARED_FILE:
10095 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10097 get_fsd_fn = self.cfg.GetFileStorageDir
10099 cfg_storagedir = get_fsd_fn()
10100 if not cfg_storagedir:
10101 raise errors.OpPrereqError("Cluster file storage dir not defined",
10102 errors.ECODE_STATE)
10103 joinargs.append(cfg_storagedir)
10105 if self.op.file_storage_dir is not None:
10106 joinargs.append(self.op.file_storage_dir)
10108 joinargs.append(self.op.instance_name)
10110 # pylint: disable=W0142
10111 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10113 def CheckPrereq(self): # pylint: disable=R0914
10114 """Check prerequisites.
10117 self._CalculateFileStorageDir()
10119 if self.op.mode == constants.INSTANCE_IMPORT:
10120 export_info = self._ReadExportInfo()
10121 self._ReadExportParams(export_info)
10122 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10124 self._old_instance_name = None
10126 if (not self.cfg.GetVGName() and
10127 self.op.disk_template not in constants.DTS_NOT_LVM):
10128 raise errors.OpPrereqError("Cluster does not support lvm-based"
10129 " instances", errors.ECODE_STATE)
10131 if (self.op.hypervisor is None or
10132 self.op.hypervisor == constants.VALUE_AUTO):
10133 self.op.hypervisor = self.cfg.GetHypervisorType()
10135 cluster = self.cfg.GetClusterInfo()
10136 enabled_hvs = cluster.enabled_hypervisors
10137 if self.op.hypervisor not in enabled_hvs:
10138 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10140 (self.op.hypervisor, ",".join(enabled_hvs)),
10141 errors.ECODE_STATE)
10143 # Check tag validity
10144 for tag in self.op.tags:
10145 objects.TaggableObject.ValidateTag(tag)
10147 # check hypervisor parameter syntax (locally)
10148 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10149 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10151 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10152 hv_type.CheckParameterSyntax(filled_hvp)
10153 self.hv_full = filled_hvp
10154 # check that we don't specify global parameters on an instance
10155 _CheckGlobalHvParams(self.op.hvparams)
10157 # fill and remember the beparams dict
10158 self.be_full = _ComputeFullBeParams(self.op, cluster)
10160 # build os parameters
10161 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10163 # now that hvp/bep are in final format, let's reset to defaults,
10165 if self.op.identify_defaults:
10166 self._RevertToDefaults(cluster)
10169 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10172 # disk checks/pre-build
10173 default_vg = self.cfg.GetVGName()
10174 self.disks = _ComputeDisks(self.op, default_vg)
10176 if self.op.mode == constants.INSTANCE_IMPORT:
10178 for idx in range(len(self.disks)):
10179 option = "disk%d_dump" % idx
10180 if export_info.has_option(constants.INISECT_INS, option):
10181 # FIXME: are the old os-es, disk sizes, etc. useful?
10182 export_name = export_info.get(constants.INISECT_INS, option)
10183 image = utils.PathJoin(self.op.src_path, export_name)
10184 disk_images.append(image)
10186 disk_images.append(False)
10188 self.src_images = disk_images
10190 if self.op.instance_name == self._old_instance_name:
10191 for idx, nic in enumerate(self.nics):
10192 if nic.mac == constants.VALUE_AUTO:
10193 nic_mac_ini = "nic%d_mac" % idx
10194 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10196 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10198 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10199 if self.op.ip_check:
10200 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10201 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10202 (self.check_ip, self.op.instance_name),
10203 errors.ECODE_NOTUNIQUE)
10205 #### mac address generation
10206 # By generating here the mac address both the allocator and the hooks get
10207 # the real final mac address rather than the 'auto' or 'generate' value.
10208 # There is a race condition between the generation and the instance object
10209 # creation, which means that we know the mac is valid now, but we're not
10210 # sure it will be when we actually add the instance. If things go bad
10211 # adding the instance will abort because of a duplicate mac, and the
10212 # creation job will fail.
10213 for nic in self.nics:
10214 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10215 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10219 if self.op.iallocator is not None:
10220 self._RunAllocator()
10222 # Release all unneeded node locks
10223 _ReleaseLocks(self, locking.LEVEL_NODE,
10224 keep=filter(None, [self.op.pnode, self.op.snode,
10225 self.op.src_node]))
10226 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10227 keep=filter(None, [self.op.pnode, self.op.snode,
10228 self.op.src_node]))
10230 #### node related checks
10232 # check primary node
10233 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10234 assert self.pnode is not None, \
10235 "Cannot retrieve locked node %s" % self.op.pnode
10237 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10238 pnode.name, errors.ECODE_STATE)
10240 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10241 pnode.name, errors.ECODE_STATE)
10242 if not pnode.vm_capable:
10243 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10244 " '%s'" % pnode.name, errors.ECODE_STATE)
10246 self.secondaries = []
10248 # Fill in any IPs from IP pools. This must happen here, because we need to
10249 # know the nic's primary node, as specified by the iallocator
10250 for idx, nic in enumerate(self.nics):
10252 if net is not None:
10253 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10254 if netparams is None:
10255 raise errors.OpPrereqError("No netparams found for network"
10256 " %s. Propably not connected to"
10257 " node's %s nodegroup" %
10258 (net, self.pnode.name),
10259 errors.ECODE_INVAL)
10260 self.LogInfo("NIC/%d inherits netparams %s" %
10261 (idx, netparams.values()))
10262 nic.nicparams = dict(netparams)
10263 if nic.ip is not None:
10264 if nic.ip.lower() == constants.NIC_IP_POOL:
10266 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10267 except errors.ReservationError:
10268 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10269 " from the address pool" % idx,
10270 errors.ECODE_STATE)
10271 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10274 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10275 except errors.ReservationError:
10276 raise errors.OpPrereqError("IP address %s already in use"
10277 " or does not belong to network %s" %
10279 errors.ECODE_NOTUNIQUE)
10281 # net is None, ip None or given
10282 if self.op.conflicts_check:
10283 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10285 # mirror node verification
10286 if self.op.disk_template in constants.DTS_INT_MIRROR:
10287 if self.op.snode == pnode.name:
10288 raise errors.OpPrereqError("The secondary node cannot be the"
10289 " primary node", errors.ECODE_INVAL)
10290 _CheckNodeOnline(self, self.op.snode)
10291 _CheckNodeNotDrained(self, self.op.snode)
10292 _CheckNodeVmCapable(self, self.op.snode)
10293 self.secondaries.append(self.op.snode)
10295 snode = self.cfg.GetNodeInfo(self.op.snode)
10296 if pnode.group != snode.group:
10297 self.LogWarning("The primary and secondary nodes are in two"
10298 " different node groups; the disk parameters"
10299 " from the first disk's node group will be"
10302 nodenames = [pnode.name] + self.secondaries
10304 # Verify instance specs
10305 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10307 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10308 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10309 constants.ISPEC_DISK_COUNT: len(self.disks),
10310 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10311 constants.ISPEC_NIC_COUNT: len(self.nics),
10312 constants.ISPEC_SPINDLE_USE: spindle_use,
10315 group_info = self.cfg.GetNodeGroup(pnode.group)
10316 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10317 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10318 if not self.op.ignore_ipolicy and res:
10319 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10320 (pnode.group, group_info.name, utils.CommaJoin(res)))
10321 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10323 if not self.adopt_disks:
10324 if self.op.disk_template == constants.DT_RBD:
10325 # _CheckRADOSFreeSpace() is just a placeholder.
10326 # Any function that checks prerequisites can be placed here.
10327 # Check if there is enough space on the RADOS cluster.
10328 _CheckRADOSFreeSpace()
10330 # Check lv size requirements, if not adopting
10331 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10332 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10334 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10335 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10336 disk[constants.IDISK_ADOPT])
10337 for disk in self.disks])
10338 if len(all_lvs) != len(self.disks):
10339 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10340 errors.ECODE_INVAL)
10341 for lv_name in all_lvs:
10343 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10344 # to ReserveLV uses the same syntax
10345 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10346 except errors.ReservationError:
10347 raise errors.OpPrereqError("LV named %s used by another instance" %
10348 lv_name, errors.ECODE_NOTUNIQUE)
10350 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10351 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10353 node_lvs = self.rpc.call_lv_list([pnode.name],
10354 vg_names.payload.keys())[pnode.name]
10355 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10356 node_lvs = node_lvs.payload
10358 delta = all_lvs.difference(node_lvs.keys())
10360 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10361 utils.CommaJoin(delta),
10362 errors.ECODE_INVAL)
10363 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10365 raise errors.OpPrereqError("Online logical volumes found, cannot"
10366 " adopt: %s" % utils.CommaJoin(online_lvs),
10367 errors.ECODE_STATE)
10368 # update the size of disk based on what is found
10369 for dsk in self.disks:
10370 dsk[constants.IDISK_SIZE] = \
10371 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10372 dsk[constants.IDISK_ADOPT])][0]))
10374 elif self.op.disk_template == constants.DT_BLOCK:
10375 # Normalize and de-duplicate device paths
10376 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10377 for disk in self.disks])
10378 if len(all_disks) != len(self.disks):
10379 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10380 errors.ECODE_INVAL)
10381 baddisks = [d for d in all_disks
10382 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10384 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10385 " cannot be adopted" %
10386 (", ".join(baddisks),
10387 constants.ADOPTABLE_BLOCKDEV_ROOT),
10388 errors.ECODE_INVAL)
10390 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10391 list(all_disks))[pnode.name]
10392 node_disks.Raise("Cannot get block device information from node %s" %
10394 node_disks = node_disks.payload
10395 delta = all_disks.difference(node_disks.keys())
10397 raise errors.OpPrereqError("Missing block device(s): %s" %
10398 utils.CommaJoin(delta),
10399 errors.ECODE_INVAL)
10400 for dsk in self.disks:
10401 dsk[constants.IDISK_SIZE] = \
10402 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10404 # Verify instance specs
10405 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10407 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10408 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10409 constants.ISPEC_DISK_COUNT: len(self.disks),
10410 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10411 for disk in self.disks],
10412 constants.ISPEC_NIC_COUNT: len(self.nics),
10413 constants.ISPEC_SPINDLE_USE: spindle_use,
10416 group_info = self.cfg.GetNodeGroup(pnode.group)
10417 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10418 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10419 if not self.op.ignore_ipolicy and res:
10420 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10421 " policy: %s") % (pnode.group,
10422 utils.CommaJoin(res)),
10423 errors.ECODE_INVAL)
10425 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10427 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10428 # check OS parameters (remotely)
10429 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10431 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10433 # memory check on primary node
10434 #TODO(dynmem): use MINMEM for checking
10436 _CheckNodeFreeMemory(self, self.pnode.name,
10437 "creating instance %s" % self.op.instance_name,
10438 self.be_full[constants.BE_MAXMEM],
10439 self.op.hypervisor)
10441 self.dry_run_result = list(nodenames)
10443 def Exec(self, feedback_fn):
10444 """Create and add the instance to the cluster.
10447 instance = self.op.instance_name
10448 pnode_name = self.pnode.name
10450 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10451 self.owned_locks(locking.LEVEL_NODE)), \
10452 "Node locks differ from node resource locks"
10454 ht_kind = self.op.hypervisor
10455 if ht_kind in constants.HTS_REQ_PORT:
10456 network_port = self.cfg.AllocatePort()
10458 network_port = None
10460 # This is ugly but we got a chicken-egg problem here
10461 # We can only take the group disk parameters, as the instance
10462 # has no disks yet (we are generating them right here).
10463 node = self.cfg.GetNodeInfo(pnode_name)
10464 nodegroup = self.cfg.GetNodeGroup(node.group)
10465 disks = _GenerateDiskTemplate(self,
10466 self.op.disk_template,
10467 instance, pnode_name,
10470 self.instance_file_storage_dir,
10471 self.op.file_driver,
10474 self.cfg.GetGroupDiskParams(nodegroup))
10476 iobj = objects.Instance(name=instance, os=self.op.os_type,
10477 primary_node=pnode_name,
10478 nics=self.nics, disks=disks,
10479 disk_template=self.op.disk_template,
10480 admin_state=constants.ADMINST_DOWN,
10481 network_port=network_port,
10482 beparams=self.op.beparams,
10483 hvparams=self.op.hvparams,
10484 hypervisor=self.op.hypervisor,
10485 osparams=self.op.osparams,
10489 for tag in self.op.tags:
10492 if self.adopt_disks:
10493 if self.op.disk_template == constants.DT_PLAIN:
10494 # rename LVs to the newly-generated names; we need to construct
10495 # 'fake' LV disks with the old data, plus the new unique_id
10496 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10498 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10499 rename_to.append(t_dsk.logical_id)
10500 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10501 self.cfg.SetDiskID(t_dsk, pnode_name)
10502 result = self.rpc.call_blockdev_rename(pnode_name,
10503 zip(tmp_disks, rename_to))
10504 result.Raise("Failed to rename adoped LVs")
10506 feedback_fn("* creating instance disks...")
10508 _CreateDisks(self, iobj)
10509 except errors.OpExecError:
10510 self.LogWarning("Device creation failed, reverting...")
10512 _RemoveDisks(self, iobj)
10514 self.cfg.ReleaseDRBDMinors(instance)
10517 feedback_fn("adding instance %s to cluster config" % instance)
10519 self.cfg.AddInstance(iobj, self.proc.GetECId())
10521 # Declare that we don't want to remove the instance lock anymore, as we've
10522 # added the instance to the config
10523 del self.remove_locks[locking.LEVEL_INSTANCE]
10525 if self.op.mode == constants.INSTANCE_IMPORT:
10526 # Release unused nodes
10527 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10529 # Release all nodes
10530 _ReleaseLocks(self, locking.LEVEL_NODE)
10533 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10534 feedback_fn("* wiping instance disks...")
10536 _WipeDisks(self, iobj)
10537 except errors.OpExecError, err:
10538 logging.exception("Wiping disks failed")
10539 self.LogWarning("Wiping instance disks failed (%s)", err)
10543 # Something is already wrong with the disks, don't do anything else
10545 elif self.op.wait_for_sync:
10546 disk_abort = not _WaitForSync(self, iobj)
10547 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10548 # make sure the disks are not degraded (still sync-ing is ok)
10549 feedback_fn("* checking mirrors status")
10550 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10555 _RemoveDisks(self, iobj)
10556 self.cfg.RemoveInstance(iobj.name)
10557 # Make sure the instance lock gets removed
10558 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10559 raise errors.OpExecError("There are some degraded disks for"
10562 # Release all node resource locks
10563 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10565 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10566 # we need to set the disks ID to the primary node, since the
10567 # preceding code might or might have not done it, depending on
10568 # disk template and other options
10569 for disk in iobj.disks:
10570 self.cfg.SetDiskID(disk, pnode_name)
10571 if self.op.mode == constants.INSTANCE_CREATE:
10572 if not self.op.no_install:
10573 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10574 not self.op.wait_for_sync)
10576 feedback_fn("* pausing disk sync to install instance OS")
10577 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10580 for idx, success in enumerate(result.payload):
10582 logging.warn("pause-sync of instance %s for disk %d failed",
10585 feedback_fn("* running the instance OS create scripts...")
10586 # FIXME: pass debug option from opcode to backend
10588 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10589 self.op.debug_level)
10591 feedback_fn("* resuming disk sync")
10592 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10595 for idx, success in enumerate(result.payload):
10597 logging.warn("resume-sync of instance %s for disk %d failed",
10600 os_add_result.Raise("Could not add os for instance %s"
10601 " on node %s" % (instance, pnode_name))
10604 if self.op.mode == constants.INSTANCE_IMPORT:
10605 feedback_fn("* running the instance OS import scripts...")
10609 for idx, image in enumerate(self.src_images):
10613 # FIXME: pass debug option from opcode to backend
10614 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10615 constants.IEIO_FILE, (image, ),
10616 constants.IEIO_SCRIPT,
10617 (iobj.disks[idx], idx),
10619 transfers.append(dt)
10622 masterd.instance.TransferInstanceData(self, feedback_fn,
10623 self.op.src_node, pnode_name,
10624 self.pnode.secondary_ip,
10626 if not compat.all(import_result):
10627 self.LogWarning("Some disks for instance %s on node %s were not"
10628 " imported successfully" % (instance, pnode_name))
10630 rename_from = self._old_instance_name
10632 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10633 feedback_fn("* preparing remote import...")
10634 # The source cluster will stop the instance before attempting to make
10635 # a connection. In some cases stopping an instance can take a long
10636 # time, hence the shutdown timeout is added to the connection
10638 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10639 self.op.source_shutdown_timeout)
10640 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10642 assert iobj.primary_node == self.pnode.name
10644 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10645 self.source_x509_ca,
10646 self._cds, timeouts)
10647 if not compat.all(disk_results):
10648 # TODO: Should the instance still be started, even if some disks
10649 # failed to import (valid for local imports, too)?
10650 self.LogWarning("Some disks for instance %s on node %s were not"
10651 " imported successfully" % (instance, pnode_name))
10653 rename_from = self.source_instance_name
10656 # also checked in the prereq part
10657 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10660 # Run rename script on newly imported instance
10661 assert iobj.name == instance
10662 feedback_fn("Running rename script for %s" % instance)
10663 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10665 self.op.debug_level)
10666 if result.fail_msg:
10667 self.LogWarning("Failed to run rename script for %s on node"
10668 " %s: %s" % (instance, pnode_name, result.fail_msg))
10670 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10673 iobj.admin_state = constants.ADMINST_UP
10674 self.cfg.Update(iobj, feedback_fn)
10675 logging.info("Starting instance %s on node %s", instance, pnode_name)
10676 feedback_fn("* starting instance...")
10677 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10679 result.Raise("Could not start instance")
10681 return list(iobj.all_nodes)
10684 class LUInstanceMultiAlloc(NoHooksLU):
10685 """Allocates multiple instances at the same time.
10690 def CheckArguments(self):
10691 """Check arguments.
10695 for inst in self.op.instances:
10696 if inst.iallocator is not None:
10697 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10698 " instance objects", errors.ECODE_INVAL)
10699 nodes.append(bool(inst.pnode))
10700 if inst.disk_template in constants.DTS_INT_MIRROR:
10701 nodes.append(bool(inst.snode))
10703 has_nodes = compat.any(nodes)
10704 if compat.all(nodes) ^ has_nodes:
10705 raise errors.OpPrereqError("There are instance objects providing"
10706 " pnode/snode while others do not",
10707 errors.ECODE_INVAL)
10709 if self.op.iallocator is None:
10710 default_iallocator = self.cfg.GetDefaultIAllocator()
10711 if default_iallocator and has_nodes:
10712 self.op.iallocator = default_iallocator
10714 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10715 " given and no cluster-wide default"
10716 " iallocator found; please specify either"
10717 " an iallocator or nodes on the instances"
10718 " or set a cluster-wide default iallocator",
10719 errors.ECODE_INVAL)
10721 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10723 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10724 utils.CommaJoin(dups), errors.ECODE_INVAL)
10726 def ExpandNames(self):
10727 """Calculate the locks.
10730 self.share_locks = _ShareAll()
10731 self.needed_locks = {}
10733 if self.op.iallocator:
10734 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10735 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10738 for inst in self.op.instances:
10739 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10740 nodeslist.append(inst.pnode)
10741 if inst.snode is not None:
10742 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10743 nodeslist.append(inst.snode)
10745 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10746 # Lock resources of instance's primary and secondary nodes (copy to
10747 # prevent accidential modification)
10748 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10750 def CheckPrereq(self):
10751 """Check prerequisite.
10754 cluster = self.cfg.GetClusterInfo()
10755 default_vg = self.cfg.GetVGName()
10756 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10757 _ComputeNics(op, cluster, None,
10758 self.cfg, self.proc),
10759 _ComputeFullBeParams(op, cluster))
10760 for op in self.op.instances]
10761 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10762 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10764 ial.Run(self.op.iallocator)
10766 if not ial.success:
10767 raise errors.OpPrereqError("Can't compute nodes using"
10768 " iallocator '%s': %s" %
10769 (self.op.iallocator, ial.info),
10770 errors.ECODE_NORES)
10772 self.ia_result = ial.result
10774 if self.op.dry_run:
10775 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10776 constants.JOB_IDS_KEY: [],
10779 def _ConstructPartialResult(self):
10780 """Contructs the partial result.
10783 (allocatable, failed) = self.ia_result
10785 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10786 map(compat.fst, allocatable),
10787 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10790 def Exec(self, feedback_fn):
10791 """Executes the opcode.
10794 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10795 (allocatable, failed) = self.ia_result
10798 for (name, nodes) in allocatable:
10799 op = op2inst.pop(name)
10802 (op.pnode, op.snode) = nodes
10804 (op.pnode,) = nodes
10808 missing = set(op2inst.keys()) - set(failed)
10809 assert not missing, \
10810 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10812 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10815 def _CheckRADOSFreeSpace():
10816 """Compute disk size requirements inside the RADOS cluster.
10819 # For the RADOS cluster we assume there is always enough space.
10823 class LUInstanceConsole(NoHooksLU):
10824 """Connect to an instance's console.
10826 This is somewhat special in that it returns the command line that
10827 you need to run on the master node in order to connect to the
10833 def ExpandNames(self):
10834 self.share_locks = _ShareAll()
10835 self._ExpandAndLockInstance()
10837 def CheckPrereq(self):
10838 """Check prerequisites.
10840 This checks that the instance is in the cluster.
10843 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10844 assert self.instance is not None, \
10845 "Cannot retrieve locked instance %s" % self.op.instance_name
10846 _CheckNodeOnline(self, self.instance.primary_node)
10848 def Exec(self, feedback_fn):
10849 """Connect to the console of an instance
10852 instance = self.instance
10853 node = instance.primary_node
10855 node_insts = self.rpc.call_instance_list([node],
10856 [instance.hypervisor])[node]
10857 node_insts.Raise("Can't get node information from %s" % node)
10859 if instance.name not in node_insts.payload:
10860 if instance.admin_state == constants.ADMINST_UP:
10861 state = constants.INSTST_ERRORDOWN
10862 elif instance.admin_state == constants.ADMINST_DOWN:
10863 state = constants.INSTST_ADMINDOWN
10865 state = constants.INSTST_ADMINOFFLINE
10866 raise errors.OpExecError("Instance %s is not running (state %s)" %
10867 (instance.name, state))
10869 logging.debug("Connecting to console of %s on %s", instance.name, node)
10871 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10874 def _GetInstanceConsole(cluster, instance):
10875 """Returns console information for an instance.
10877 @type cluster: L{objects.Cluster}
10878 @type instance: L{objects.Instance}
10882 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10883 # beparams and hvparams are passed separately, to avoid editing the
10884 # instance and then saving the defaults in the instance itself.
10885 hvparams = cluster.FillHV(instance)
10886 beparams = cluster.FillBE(instance)
10887 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10889 assert console.instance == instance.name
10890 assert console.Validate()
10892 return console.ToDict()
10895 class LUInstanceReplaceDisks(LogicalUnit):
10896 """Replace the disks of an instance.
10899 HPATH = "mirrors-replace"
10900 HTYPE = constants.HTYPE_INSTANCE
10903 def CheckArguments(self):
10904 """Check arguments.
10907 remote_node = self.op.remote_node
10908 ialloc = self.op.iallocator
10909 if self.op.mode == constants.REPLACE_DISK_CHG:
10910 if remote_node is None and ialloc is None:
10911 raise errors.OpPrereqError("When changing the secondary either an"
10912 " iallocator script must be used or the"
10913 " new node given", errors.ECODE_INVAL)
10915 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10917 elif remote_node is not None or ialloc is not None:
10918 # Not replacing the secondary
10919 raise errors.OpPrereqError("The iallocator and new node options can"
10920 " only be used when changing the"
10921 " secondary node", errors.ECODE_INVAL)
10923 def ExpandNames(self):
10924 self._ExpandAndLockInstance()
10926 assert locking.LEVEL_NODE not in self.needed_locks
10927 assert locking.LEVEL_NODE_RES not in self.needed_locks
10928 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10930 assert self.op.iallocator is None or self.op.remote_node is None, \
10931 "Conflicting options"
10933 if self.op.remote_node is not None:
10934 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10936 # Warning: do not remove the locking of the new secondary here
10937 # unless DRBD8.AddChildren is changed to work in parallel;
10938 # currently it doesn't since parallel invocations of
10939 # FindUnusedMinor will conflict
10940 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10941 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10943 self.needed_locks[locking.LEVEL_NODE] = []
10944 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10946 if self.op.iallocator is not None:
10947 # iallocator will select a new node in the same group
10948 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10950 self.needed_locks[locking.LEVEL_NODE_RES] = []
10952 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10953 self.op.iallocator, self.op.remote_node,
10954 self.op.disks, self.op.early_release,
10955 self.op.ignore_ipolicy)
10957 self.tasklets = [self.replacer]
10959 def DeclareLocks(self, level):
10960 if level == locking.LEVEL_NODEGROUP:
10961 assert self.op.remote_node is None
10962 assert self.op.iallocator is not None
10963 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10965 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10966 # Lock all groups used by instance optimistically; this requires going
10967 # via the node before it's locked, requiring verification later on
10968 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10969 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10971 elif level == locking.LEVEL_NODE:
10972 if self.op.iallocator is not None:
10973 assert self.op.remote_node is None
10974 assert not self.needed_locks[locking.LEVEL_NODE]
10976 # Lock member nodes of all locked groups
10977 self.needed_locks[locking.LEVEL_NODE] = \
10979 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10980 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10982 self._LockInstancesNodes()
10983 elif level == locking.LEVEL_NODE_RES:
10985 self.needed_locks[locking.LEVEL_NODE_RES] = \
10986 self.needed_locks[locking.LEVEL_NODE]
10988 def BuildHooksEnv(self):
10989 """Build hooks env.
10991 This runs on the master, the primary and all the secondaries.
10994 instance = self.replacer.instance
10996 "MODE": self.op.mode,
10997 "NEW_SECONDARY": self.op.remote_node,
10998 "OLD_SECONDARY": instance.secondary_nodes[0],
11000 env.update(_BuildInstanceHookEnvByObject(self, instance))
11003 def BuildHooksNodes(self):
11004 """Build hooks nodes.
11007 instance = self.replacer.instance
11009 self.cfg.GetMasterNode(),
11010 instance.primary_node,
11012 if self.op.remote_node is not None:
11013 nl.append(self.op.remote_node)
11016 def CheckPrereq(self):
11017 """Check prerequisites.
11020 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11021 self.op.iallocator is None)
11023 # Verify if node group locks are still correct
11024 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11026 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11028 return LogicalUnit.CheckPrereq(self)
11031 class TLReplaceDisks(Tasklet):
11032 """Replaces disks for an instance.
11034 Note: Locking is not within the scope of this class.
11037 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11038 disks, early_release, ignore_ipolicy):
11039 """Initializes this class.
11042 Tasklet.__init__(self, lu)
11045 self.instance_name = instance_name
11047 self.iallocator_name = iallocator_name
11048 self.remote_node = remote_node
11050 self.early_release = early_release
11051 self.ignore_ipolicy = ignore_ipolicy
11054 self.instance = None
11055 self.new_node = None
11056 self.target_node = None
11057 self.other_node = None
11058 self.remote_node_info = None
11059 self.node_secondary_ip = None
11062 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11063 """Compute a new secondary node using an IAllocator.
11066 req = iallocator.IAReqRelocate(name=instance_name,
11067 relocate_from=list(relocate_from))
11068 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11070 ial.Run(iallocator_name)
11072 if not ial.success:
11073 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11074 " %s" % (iallocator_name, ial.info),
11075 errors.ECODE_NORES)
11077 remote_node_name = ial.result[0]
11079 lu.LogInfo("Selected new secondary for instance '%s': %s",
11080 instance_name, remote_node_name)
11082 return remote_node_name
11084 def _FindFaultyDisks(self, node_name):
11085 """Wrapper for L{_FindFaultyInstanceDisks}.
11088 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11091 def _CheckDisksActivated(self, instance):
11092 """Checks if the instance disks are activated.
11094 @param instance: The instance to check disks
11095 @return: True if they are activated, False otherwise
11098 nodes = instance.all_nodes
11100 for idx, dev in enumerate(instance.disks):
11102 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11103 self.cfg.SetDiskID(dev, node)
11105 result = _BlockdevFind(self, node, dev, instance)
11109 elif result.fail_msg or not result.payload:
11114 def CheckPrereq(self):
11115 """Check prerequisites.
11117 This checks that the instance is in the cluster.
11120 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11121 assert instance is not None, \
11122 "Cannot retrieve locked instance %s" % self.instance_name
11124 if instance.disk_template != constants.DT_DRBD8:
11125 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11126 " instances", errors.ECODE_INVAL)
11128 if len(instance.secondary_nodes) != 1:
11129 raise errors.OpPrereqError("The instance has a strange layout,"
11130 " expected one secondary but found %d" %
11131 len(instance.secondary_nodes),
11132 errors.ECODE_FAULT)
11134 instance = self.instance
11135 secondary_node = instance.secondary_nodes[0]
11137 if self.iallocator_name is None:
11138 remote_node = self.remote_node
11140 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11141 instance.name, instance.secondary_nodes)
11143 if remote_node is None:
11144 self.remote_node_info = None
11146 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11147 "Remote node '%s' is not locked" % remote_node
11149 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11150 assert self.remote_node_info is not None, \
11151 "Cannot retrieve locked node %s" % remote_node
11153 if remote_node == self.instance.primary_node:
11154 raise errors.OpPrereqError("The specified node is the primary node of"
11155 " the instance", errors.ECODE_INVAL)
11157 if remote_node == secondary_node:
11158 raise errors.OpPrereqError("The specified node is already the"
11159 " secondary node of the instance",
11160 errors.ECODE_INVAL)
11162 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11163 constants.REPLACE_DISK_CHG):
11164 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11165 errors.ECODE_INVAL)
11167 if self.mode == constants.REPLACE_DISK_AUTO:
11168 if not self._CheckDisksActivated(instance):
11169 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11170 " first" % self.instance_name,
11171 errors.ECODE_STATE)
11172 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11173 faulty_secondary = self._FindFaultyDisks(secondary_node)
11175 if faulty_primary and faulty_secondary:
11176 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11177 " one node and can not be repaired"
11178 " automatically" % self.instance_name,
11179 errors.ECODE_STATE)
11182 self.disks = faulty_primary
11183 self.target_node = instance.primary_node
11184 self.other_node = secondary_node
11185 check_nodes = [self.target_node, self.other_node]
11186 elif faulty_secondary:
11187 self.disks = faulty_secondary
11188 self.target_node = secondary_node
11189 self.other_node = instance.primary_node
11190 check_nodes = [self.target_node, self.other_node]
11196 # Non-automatic modes
11197 if self.mode == constants.REPLACE_DISK_PRI:
11198 self.target_node = instance.primary_node
11199 self.other_node = secondary_node
11200 check_nodes = [self.target_node, self.other_node]
11202 elif self.mode == constants.REPLACE_DISK_SEC:
11203 self.target_node = secondary_node
11204 self.other_node = instance.primary_node
11205 check_nodes = [self.target_node, self.other_node]
11207 elif self.mode == constants.REPLACE_DISK_CHG:
11208 self.new_node = remote_node
11209 self.other_node = instance.primary_node
11210 self.target_node = secondary_node
11211 check_nodes = [self.new_node, self.other_node]
11213 _CheckNodeNotDrained(self.lu, remote_node)
11214 _CheckNodeVmCapable(self.lu, remote_node)
11216 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11217 assert old_node_info is not None
11218 if old_node_info.offline and not self.early_release:
11219 # doesn't make sense to delay the release
11220 self.early_release = True
11221 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11222 " early-release mode", secondary_node)
11225 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11228 # If not specified all disks should be replaced
11230 self.disks = range(len(self.instance.disks))
11232 # TODO: This is ugly, but right now we can't distinguish between internal
11233 # submitted opcode and external one. We should fix that.
11234 if self.remote_node_info:
11235 # We change the node, lets verify it still meets instance policy
11236 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11237 cluster = self.cfg.GetClusterInfo()
11238 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11240 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11241 ignore=self.ignore_ipolicy)
11243 for node in check_nodes:
11244 _CheckNodeOnline(self.lu, node)
11246 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11249 if node_name is not None)
11251 # Release unneeded node and node resource locks
11252 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11253 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11255 # Release any owned node group
11256 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11257 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11259 # Check whether disks are valid
11260 for disk_idx in self.disks:
11261 instance.FindDisk(disk_idx)
11263 # Get secondary node IP addresses
11264 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11265 in self.cfg.GetMultiNodeInfo(touched_nodes))
11267 def Exec(self, feedback_fn):
11268 """Execute disk replacement.
11270 This dispatches the disk replacement to the appropriate handler.
11274 # Verify owned locks before starting operation
11275 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11276 assert set(owned_nodes) == set(self.node_secondary_ip), \
11277 ("Incorrect node locks, owning %s, expected %s" %
11278 (owned_nodes, self.node_secondary_ip.keys()))
11279 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11280 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11282 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11283 assert list(owned_instances) == [self.instance_name], \
11284 "Instance '%s' not locked" % self.instance_name
11286 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11287 "Should not own any node group lock at this point"
11290 feedback_fn("No disks need replacement for instance '%s'" %
11291 self.instance.name)
11294 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11295 (utils.CommaJoin(self.disks), self.instance.name))
11296 feedback_fn("Current primary node: %s", self.instance.primary_node)
11297 feedback_fn("Current seconary node: %s",
11298 utils.CommaJoin(self.instance.secondary_nodes))
11300 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11302 # Activate the instance disks if we're replacing them on a down instance
11304 _StartInstanceDisks(self.lu, self.instance, True)
11307 # Should we replace the secondary node?
11308 if self.new_node is not None:
11309 fn = self._ExecDrbd8Secondary
11311 fn = self._ExecDrbd8DiskOnly
11313 result = fn(feedback_fn)
11315 # Deactivate the instance disks if we're replacing them on a
11318 _SafeShutdownInstanceDisks(self.lu, self.instance)
11320 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11323 # Verify owned locks
11324 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11325 nodes = frozenset(self.node_secondary_ip)
11326 assert ((self.early_release and not owned_nodes) or
11327 (not self.early_release and not (set(owned_nodes) - nodes))), \
11328 ("Not owning the correct locks, early_release=%s, owned=%r,"
11329 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11333 def _CheckVolumeGroup(self, nodes):
11334 self.lu.LogInfo("Checking volume groups")
11336 vgname = self.cfg.GetVGName()
11338 # Make sure volume group exists on all involved nodes
11339 results = self.rpc.call_vg_list(nodes)
11341 raise errors.OpExecError("Can't list volume groups on the nodes")
11344 res = results[node]
11345 res.Raise("Error checking node %s" % node)
11346 if vgname not in res.payload:
11347 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11350 def _CheckDisksExistence(self, nodes):
11351 # Check disk existence
11352 for idx, dev in enumerate(self.instance.disks):
11353 if idx not in self.disks:
11357 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11358 self.cfg.SetDiskID(dev, node)
11360 result = _BlockdevFind(self, node, dev, self.instance)
11362 msg = result.fail_msg
11363 if msg or not result.payload:
11365 msg = "disk not found"
11366 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11369 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11370 for idx, dev in enumerate(self.instance.disks):
11371 if idx not in self.disks:
11374 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11377 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11378 on_primary, ldisk=ldisk):
11379 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11380 " replace disks for instance %s" %
11381 (node_name, self.instance.name))
11383 def _CreateNewStorage(self, node_name):
11384 """Create new storage on the primary or secondary node.
11386 This is only used for same-node replaces, not for changing the
11387 secondary node, hence we don't want to modify the existing disk.
11392 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11393 for idx, dev in enumerate(disks):
11394 if idx not in self.disks:
11397 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11399 self.cfg.SetDiskID(dev, node_name)
11401 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11402 names = _GenerateUniqueNames(self.lu, lv_names)
11404 (data_disk, meta_disk) = dev.children
11405 vg_data = data_disk.logical_id[0]
11406 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11407 logical_id=(vg_data, names[0]),
11408 params=data_disk.params)
11409 vg_meta = meta_disk.logical_id[0]
11410 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11411 size=constants.DRBD_META_SIZE,
11412 logical_id=(vg_meta, names[1]),
11413 params=meta_disk.params)
11415 new_lvs = [lv_data, lv_meta]
11416 old_lvs = [child.Copy() for child in dev.children]
11417 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11419 # we pass force_create=True to force the LVM creation
11420 for new_lv in new_lvs:
11421 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11422 _GetInstanceInfoText(self.instance), False)
11426 def _CheckDevices(self, node_name, iv_names):
11427 for name, (dev, _, _) in iv_names.iteritems():
11428 self.cfg.SetDiskID(dev, node_name)
11430 result = _BlockdevFind(self, node_name, dev, self.instance)
11432 msg = result.fail_msg
11433 if msg or not result.payload:
11435 msg = "disk not found"
11436 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11439 if result.payload.is_degraded:
11440 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11442 def _RemoveOldStorage(self, node_name, iv_names):
11443 for name, (_, old_lvs, _) in iv_names.iteritems():
11444 self.lu.LogInfo("Remove logical volumes for %s" % name)
11447 self.cfg.SetDiskID(lv, node_name)
11449 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11451 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11452 hint="remove unused LVs manually")
11454 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11455 """Replace a disk on the primary or secondary for DRBD 8.
11457 The algorithm for replace is quite complicated:
11459 1. for each disk to be replaced:
11461 1. create new LVs on the target node with unique names
11462 1. detach old LVs from the drbd device
11463 1. rename old LVs to name_replaced.<time_t>
11464 1. rename new LVs to old LVs
11465 1. attach the new LVs (with the old names now) to the drbd device
11467 1. wait for sync across all devices
11469 1. for each modified disk:
11471 1. remove old LVs (which have the name name_replaces.<time_t>)
11473 Failures are not very well handled.
11478 # Step: check device activation
11479 self.lu.LogStep(1, steps_total, "Check device existence")
11480 self._CheckDisksExistence([self.other_node, self.target_node])
11481 self._CheckVolumeGroup([self.target_node, self.other_node])
11483 # Step: check other node consistency
11484 self.lu.LogStep(2, steps_total, "Check peer consistency")
11485 self._CheckDisksConsistency(self.other_node,
11486 self.other_node == self.instance.primary_node,
11489 # Step: create new storage
11490 self.lu.LogStep(3, steps_total, "Allocate new storage")
11491 iv_names = self._CreateNewStorage(self.target_node)
11493 # Step: for each lv, detach+rename*2+attach
11494 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11495 for dev, old_lvs, new_lvs in iv_names.itervalues():
11496 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11498 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11500 result.Raise("Can't detach drbd from local storage on node"
11501 " %s for device %s" % (self.target_node, dev.iv_name))
11503 #cfg.Update(instance)
11505 # ok, we created the new LVs, so now we know we have the needed
11506 # storage; as such, we proceed on the target node to rename
11507 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11508 # using the assumption that logical_id == physical_id (which in
11509 # turn is the unique_id on that node)
11511 # FIXME(iustin): use a better name for the replaced LVs
11512 temp_suffix = int(time.time())
11513 ren_fn = lambda d, suff: (d.physical_id[0],
11514 d.physical_id[1] + "_replaced-%s" % suff)
11516 # Build the rename list based on what LVs exist on the node
11517 rename_old_to_new = []
11518 for to_ren in old_lvs:
11519 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11520 if not result.fail_msg and result.payload:
11522 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11524 self.lu.LogInfo("Renaming the old LVs on the target node")
11525 result = self.rpc.call_blockdev_rename(self.target_node,
11527 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11529 # Now we rename the new LVs to the old LVs
11530 self.lu.LogInfo("Renaming the new LVs on the target node")
11531 rename_new_to_old = [(new, old.physical_id)
11532 for old, new in zip(old_lvs, new_lvs)]
11533 result = self.rpc.call_blockdev_rename(self.target_node,
11535 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11537 # Intermediate steps of in memory modifications
11538 for old, new in zip(old_lvs, new_lvs):
11539 new.logical_id = old.logical_id
11540 self.cfg.SetDiskID(new, self.target_node)
11542 # We need to modify old_lvs so that removal later removes the
11543 # right LVs, not the newly added ones; note that old_lvs is a
11545 for disk in old_lvs:
11546 disk.logical_id = ren_fn(disk, temp_suffix)
11547 self.cfg.SetDiskID(disk, self.target_node)
11549 # Now that the new lvs have the old name, we can add them to the device
11550 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11551 result = self.rpc.call_blockdev_addchildren(self.target_node,
11552 (dev, self.instance), new_lvs)
11553 msg = result.fail_msg
11555 for new_lv in new_lvs:
11556 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11559 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11560 hint=("cleanup manually the unused logical"
11562 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11564 cstep = itertools.count(5)
11566 if self.early_release:
11567 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11568 self._RemoveOldStorage(self.target_node, iv_names)
11569 # TODO: Check if releasing locks early still makes sense
11570 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11572 # Release all resource locks except those used by the instance
11573 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11574 keep=self.node_secondary_ip.keys())
11576 # Release all node locks while waiting for sync
11577 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11579 # TODO: Can the instance lock be downgraded here? Take the optional disk
11580 # shutdown in the caller into consideration.
11583 # This can fail as the old devices are degraded and _WaitForSync
11584 # does a combined result over all disks, so we don't check its return value
11585 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11586 _WaitForSync(self.lu, self.instance)
11588 # Check all devices manually
11589 self._CheckDevices(self.instance.primary_node, iv_names)
11591 # Step: remove old storage
11592 if not self.early_release:
11593 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11594 self._RemoveOldStorage(self.target_node, iv_names)
11596 def _ExecDrbd8Secondary(self, feedback_fn):
11597 """Replace the secondary node for DRBD 8.
11599 The algorithm for replace is quite complicated:
11600 - for all disks of the instance:
11601 - create new LVs on the new node with same names
11602 - shutdown the drbd device on the old secondary
11603 - disconnect the drbd network on the primary
11604 - create the drbd device on the new secondary
11605 - network attach the drbd on the primary, using an artifice:
11606 the drbd code for Attach() will connect to the network if it
11607 finds a device which is connected to the good local disks but
11608 not network enabled
11609 - wait for sync across all devices
11610 - remove all disks from the old secondary
11612 Failures are not very well handled.
11617 pnode = self.instance.primary_node
11619 # Step: check device activation
11620 self.lu.LogStep(1, steps_total, "Check device existence")
11621 self._CheckDisksExistence([self.instance.primary_node])
11622 self._CheckVolumeGroup([self.instance.primary_node])
11624 # Step: check other node consistency
11625 self.lu.LogStep(2, steps_total, "Check peer consistency")
11626 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11628 # Step: create new storage
11629 self.lu.LogStep(3, steps_total, "Allocate new storage")
11630 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11631 for idx, dev in enumerate(disks):
11632 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11633 (self.new_node, idx))
11634 # we pass force_create=True to force LVM creation
11635 for new_lv in dev.children:
11636 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11637 True, _GetInstanceInfoText(self.instance), False)
11639 # Step 4: dbrd minors and drbd setups changes
11640 # after this, we must manually remove the drbd minors on both the
11641 # error and the success paths
11642 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11643 minors = self.cfg.AllocateDRBDMinor([self.new_node
11644 for dev in self.instance.disks],
11645 self.instance.name)
11646 logging.debug("Allocated minors %r", minors)
11649 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11650 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11651 (self.new_node, idx))
11652 # create new devices on new_node; note that we create two IDs:
11653 # one without port, so the drbd will be activated without
11654 # networking information on the new node at this stage, and one
11655 # with network, for the latter activation in step 4
11656 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11657 if self.instance.primary_node == o_node1:
11660 assert self.instance.primary_node == o_node2, "Three-node instance?"
11663 new_alone_id = (self.instance.primary_node, self.new_node, None,
11664 p_minor, new_minor, o_secret)
11665 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11666 p_minor, new_minor, o_secret)
11668 iv_names[idx] = (dev, dev.children, new_net_id)
11669 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11671 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11672 logical_id=new_alone_id,
11673 children=dev.children,
11676 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11679 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11681 _GetInstanceInfoText(self.instance), False)
11682 except errors.GenericError:
11683 self.cfg.ReleaseDRBDMinors(self.instance.name)
11686 # We have new devices, shutdown the drbd on the old secondary
11687 for idx, dev in enumerate(self.instance.disks):
11688 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11689 self.cfg.SetDiskID(dev, self.target_node)
11690 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11691 (dev, self.instance)).fail_msg
11693 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11694 "node: %s" % (idx, msg),
11695 hint=("Please cleanup this device manually as"
11696 " soon as possible"))
11698 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11699 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11700 self.instance.disks)[pnode]
11702 msg = result.fail_msg
11704 # detaches didn't succeed (unlikely)
11705 self.cfg.ReleaseDRBDMinors(self.instance.name)
11706 raise errors.OpExecError("Can't detach the disks from the network on"
11707 " old node: %s" % (msg,))
11709 # if we managed to detach at least one, we update all the disks of
11710 # the instance to point to the new secondary
11711 self.lu.LogInfo("Updating instance configuration")
11712 for dev, _, new_logical_id in iv_names.itervalues():
11713 dev.logical_id = new_logical_id
11714 self.cfg.SetDiskID(dev, self.instance.primary_node)
11716 self.cfg.Update(self.instance, feedback_fn)
11718 # Release all node locks (the configuration has been updated)
11719 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11721 # and now perform the drbd attach
11722 self.lu.LogInfo("Attaching primary drbds to new secondary"
11723 " (standalone => connected)")
11724 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11726 self.node_secondary_ip,
11727 (self.instance.disks, self.instance),
11728 self.instance.name,
11730 for to_node, to_result in result.items():
11731 msg = to_result.fail_msg
11733 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11735 hint=("please do a gnt-instance info to see the"
11736 " status of disks"))
11738 cstep = itertools.count(5)
11740 if self.early_release:
11741 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11742 self._RemoveOldStorage(self.target_node, iv_names)
11743 # TODO: Check if releasing locks early still makes sense
11744 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11746 # Release all resource locks except those used by the instance
11747 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11748 keep=self.node_secondary_ip.keys())
11750 # TODO: Can the instance lock be downgraded here? Take the optional disk
11751 # shutdown in the caller into consideration.
11754 # This can fail as the old devices are degraded and _WaitForSync
11755 # does a combined result over all disks, so we don't check its return value
11756 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11757 _WaitForSync(self.lu, self.instance)
11759 # Check all devices manually
11760 self._CheckDevices(self.instance.primary_node, iv_names)
11762 # Step: remove old storage
11763 if not self.early_release:
11764 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11765 self._RemoveOldStorage(self.target_node, iv_names)
11768 class LURepairNodeStorage(NoHooksLU):
11769 """Repairs the volume group on a node.
11774 def CheckArguments(self):
11775 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11777 storage_type = self.op.storage_type
11779 if (constants.SO_FIX_CONSISTENCY not in
11780 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11781 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11782 " repaired" % storage_type,
11783 errors.ECODE_INVAL)
11785 def ExpandNames(self):
11786 self.needed_locks = {
11787 locking.LEVEL_NODE: [self.op.node_name],
11790 def _CheckFaultyDisks(self, instance, node_name):
11791 """Ensure faulty disks abort the opcode or at least warn."""
11793 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11795 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11796 " node '%s'" % (instance.name, node_name),
11797 errors.ECODE_STATE)
11798 except errors.OpPrereqError, err:
11799 if self.op.ignore_consistency:
11800 self.proc.LogWarning(str(err.args[0]))
11804 def CheckPrereq(self):
11805 """Check prerequisites.
11808 # Check whether any instance on this node has faulty disks
11809 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11810 if inst.admin_state != constants.ADMINST_UP:
11812 check_nodes = set(inst.all_nodes)
11813 check_nodes.discard(self.op.node_name)
11814 for inst_node_name in check_nodes:
11815 self._CheckFaultyDisks(inst, inst_node_name)
11817 def Exec(self, feedback_fn):
11818 feedback_fn("Repairing storage unit '%s' on %s ..." %
11819 (self.op.name, self.op.node_name))
11821 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11822 result = self.rpc.call_storage_execute(self.op.node_name,
11823 self.op.storage_type, st_args,
11825 constants.SO_FIX_CONSISTENCY)
11826 result.Raise("Failed to repair storage unit '%s' on %s" %
11827 (self.op.name, self.op.node_name))
11830 class LUNodeEvacuate(NoHooksLU):
11831 """Evacuates instances off a list of nodes.
11836 _MODE2IALLOCATOR = {
11837 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11838 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11839 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11841 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11842 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11843 constants.IALLOCATOR_NEVAC_MODES)
11845 def CheckArguments(self):
11846 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11848 def ExpandNames(self):
11849 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11851 if self.op.remote_node is not None:
11852 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11853 assert self.op.remote_node
11855 if self.op.remote_node == self.op.node_name:
11856 raise errors.OpPrereqError("Can not use evacuated node as a new"
11857 " secondary node", errors.ECODE_INVAL)
11859 if self.op.mode != constants.NODE_EVAC_SEC:
11860 raise errors.OpPrereqError("Without the use of an iallocator only"
11861 " secondary instances can be evacuated",
11862 errors.ECODE_INVAL)
11865 self.share_locks = _ShareAll()
11866 self.needed_locks = {
11867 locking.LEVEL_INSTANCE: [],
11868 locking.LEVEL_NODEGROUP: [],
11869 locking.LEVEL_NODE: [],
11872 # Determine nodes (via group) optimistically, needs verification once locks
11873 # have been acquired
11874 self.lock_nodes = self._DetermineNodes()
11876 def _DetermineNodes(self):
11877 """Gets the list of nodes to operate on.
11880 if self.op.remote_node is None:
11881 # Iallocator will choose any node(s) in the same group
11882 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11884 group_nodes = frozenset([self.op.remote_node])
11886 # Determine nodes to be locked
11887 return set([self.op.node_name]) | group_nodes
11889 def _DetermineInstances(self):
11890 """Builds list of instances to operate on.
11893 assert self.op.mode in constants.NODE_EVAC_MODES
11895 if self.op.mode == constants.NODE_EVAC_PRI:
11896 # Primary instances only
11897 inst_fn = _GetNodePrimaryInstances
11898 assert self.op.remote_node is None, \
11899 "Evacuating primary instances requires iallocator"
11900 elif self.op.mode == constants.NODE_EVAC_SEC:
11901 # Secondary instances only
11902 inst_fn = _GetNodeSecondaryInstances
11905 assert self.op.mode == constants.NODE_EVAC_ALL
11906 inst_fn = _GetNodeInstances
11907 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11909 raise errors.OpPrereqError("Due to an issue with the iallocator"
11910 " interface it is not possible to evacuate"
11911 " all instances at once; specify explicitly"
11912 " whether to evacuate primary or secondary"
11914 errors.ECODE_INVAL)
11916 return inst_fn(self.cfg, self.op.node_name)
11918 def DeclareLocks(self, level):
11919 if level == locking.LEVEL_INSTANCE:
11920 # Lock instances optimistically, needs verification once node and group
11921 # locks have been acquired
11922 self.needed_locks[locking.LEVEL_INSTANCE] = \
11923 set(i.name for i in self._DetermineInstances())
11925 elif level == locking.LEVEL_NODEGROUP:
11926 # Lock node groups for all potential target nodes optimistically, needs
11927 # verification once nodes have been acquired
11928 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11929 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11931 elif level == locking.LEVEL_NODE:
11932 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11934 def CheckPrereq(self):
11936 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11937 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11938 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11940 need_nodes = self._DetermineNodes()
11942 if not owned_nodes.issuperset(need_nodes):
11943 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11944 " locks were acquired, current nodes are"
11945 " are '%s', used to be '%s'; retry the"
11947 (self.op.node_name,
11948 utils.CommaJoin(need_nodes),
11949 utils.CommaJoin(owned_nodes)),
11950 errors.ECODE_STATE)
11952 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11953 if owned_groups != wanted_groups:
11954 raise errors.OpExecError("Node groups changed since locks were acquired,"
11955 " current groups are '%s', used to be '%s';"
11956 " retry the operation" %
11957 (utils.CommaJoin(wanted_groups),
11958 utils.CommaJoin(owned_groups)))
11960 # Determine affected instances
11961 self.instances = self._DetermineInstances()
11962 self.instance_names = [i.name for i in self.instances]
11964 if set(self.instance_names) != owned_instances:
11965 raise errors.OpExecError("Instances on node '%s' changed since locks"
11966 " were acquired, current instances are '%s',"
11967 " used to be '%s'; retry the operation" %
11968 (self.op.node_name,
11969 utils.CommaJoin(self.instance_names),
11970 utils.CommaJoin(owned_instances)))
11972 if self.instance_names:
11973 self.LogInfo("Evacuating instances from node '%s': %s",
11975 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11977 self.LogInfo("No instances to evacuate from node '%s'",
11980 if self.op.remote_node is not None:
11981 for i in self.instances:
11982 if i.primary_node == self.op.remote_node:
11983 raise errors.OpPrereqError("Node %s is the primary node of"
11984 " instance %s, cannot use it as"
11986 (self.op.remote_node, i.name),
11987 errors.ECODE_INVAL)
11989 def Exec(self, feedback_fn):
11990 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11992 if not self.instance_names:
11993 # No instances to evacuate
11996 elif self.op.iallocator is not None:
11997 # TODO: Implement relocation to other group
11998 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11999 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12000 instances=list(self.instance_names))
12001 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12003 ial.Run(self.op.iallocator)
12005 if not ial.success:
12006 raise errors.OpPrereqError("Can't compute node evacuation using"
12007 " iallocator '%s': %s" %
12008 (self.op.iallocator, ial.info),
12009 errors.ECODE_NORES)
12011 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12013 elif self.op.remote_node is not None:
12014 assert self.op.mode == constants.NODE_EVAC_SEC
12016 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12017 remote_node=self.op.remote_node,
12019 mode=constants.REPLACE_DISK_CHG,
12020 early_release=self.op.early_release)]
12021 for instance_name in self.instance_names]
12024 raise errors.ProgrammerError("No iallocator or remote node")
12026 return ResultWithJobs(jobs)
12029 def _SetOpEarlyRelease(early_release, op):
12030 """Sets C{early_release} flag on opcodes if available.
12034 op.early_release = early_release
12035 except AttributeError:
12036 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12041 def _NodeEvacDest(use_nodes, group, nodes):
12042 """Returns group or nodes depending on caller's choice.
12046 return utils.CommaJoin(nodes)
12051 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12052 """Unpacks the result of change-group and node-evacuate iallocator requests.
12054 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12055 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12057 @type lu: L{LogicalUnit}
12058 @param lu: Logical unit instance
12059 @type alloc_result: tuple/list
12060 @param alloc_result: Result from iallocator
12061 @type early_release: bool
12062 @param early_release: Whether to release locks early if possible
12063 @type use_nodes: bool
12064 @param use_nodes: Whether to display node names instead of groups
12067 (moved, failed, jobs) = alloc_result
12070 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12071 for (name, reason) in failed)
12072 lu.LogWarning("Unable to evacuate instances %s", failreason)
12073 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12076 lu.LogInfo("Instances to be moved: %s",
12077 utils.CommaJoin("%s (to %s)" %
12078 (name, _NodeEvacDest(use_nodes, group, nodes))
12079 for (name, group, nodes) in moved))
12081 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12082 map(opcodes.OpCode.LoadOpCode, ops))
12086 def _DiskSizeInBytesToMebibytes(lu, size):
12087 """Converts a disk size in bytes to mebibytes.
12089 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12092 (mib, remainder) = divmod(size, 1024 * 1024)
12095 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12096 " to not overwrite existing data (%s bytes will not be"
12097 " wiped)", (1024 * 1024) - remainder)
12103 class LUInstanceGrowDisk(LogicalUnit):
12104 """Grow a disk of an instance.
12107 HPATH = "disk-grow"
12108 HTYPE = constants.HTYPE_INSTANCE
12111 def ExpandNames(self):
12112 self._ExpandAndLockInstance()
12113 self.needed_locks[locking.LEVEL_NODE] = []
12114 self.needed_locks[locking.LEVEL_NODE_RES] = []
12115 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12116 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12118 def DeclareLocks(self, level):
12119 if level == locking.LEVEL_NODE:
12120 self._LockInstancesNodes()
12121 elif level == locking.LEVEL_NODE_RES:
12123 self.needed_locks[locking.LEVEL_NODE_RES] = \
12124 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12126 def BuildHooksEnv(self):
12127 """Build hooks env.
12129 This runs on the master, the primary and all the secondaries.
12133 "DISK": self.op.disk,
12134 "AMOUNT": self.op.amount,
12135 "ABSOLUTE": self.op.absolute,
12137 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12140 def BuildHooksNodes(self):
12141 """Build hooks nodes.
12144 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12147 def CheckPrereq(self):
12148 """Check prerequisites.
12150 This checks that the instance is in the cluster.
12153 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12154 assert instance is not None, \
12155 "Cannot retrieve locked instance %s" % self.op.instance_name
12156 nodenames = list(instance.all_nodes)
12157 for node in nodenames:
12158 _CheckNodeOnline(self, node)
12160 self.instance = instance
12162 if instance.disk_template not in constants.DTS_GROWABLE:
12163 raise errors.OpPrereqError("Instance's disk layout does not support"
12164 " growing", errors.ECODE_INVAL)
12166 self.disk = instance.FindDisk(self.op.disk)
12168 if self.op.absolute:
12169 self.target = self.op.amount
12170 self.delta = self.target - self.disk.size
12172 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12173 "current disk size (%s)" %
12174 (utils.FormatUnit(self.target, "h"),
12175 utils.FormatUnit(self.disk.size, "h")),
12176 errors.ECODE_STATE)
12178 self.delta = self.op.amount
12179 self.target = self.disk.size + self.delta
12181 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12182 utils.FormatUnit(self.delta, "h"),
12183 errors.ECODE_INVAL)
12185 if instance.disk_template not in (constants.DT_FILE,
12186 constants.DT_SHARED_FILE,
12188 # TODO: check the free disk space for file, when that feature will be
12190 _CheckNodesFreeDiskPerVG(self, nodenames,
12191 self.disk.ComputeGrowth(self.delta))
12193 def Exec(self, feedback_fn):
12194 """Execute disk grow.
12197 instance = self.instance
12200 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12201 assert (self.owned_locks(locking.LEVEL_NODE) ==
12202 self.owned_locks(locking.LEVEL_NODE_RES))
12204 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12206 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12208 raise errors.OpExecError("Cannot activate block device to grow")
12210 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12211 (self.op.disk, instance.name,
12212 utils.FormatUnit(self.delta, "h"),
12213 utils.FormatUnit(self.target, "h")))
12215 # First run all grow ops in dry-run mode
12216 for node in instance.all_nodes:
12217 self.cfg.SetDiskID(disk, node)
12218 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12220 result.Raise("Dry-run grow request failed to node %s" % node)
12223 # Get disk size from primary node for wiping
12224 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12225 result.Raise("Failed to retrieve disk size from node '%s'" %
12226 instance.primary_node)
12228 (disk_size_in_bytes, ) = result.payload
12230 if disk_size_in_bytes is None:
12231 raise errors.OpExecError("Failed to retrieve disk size from primary"
12232 " node '%s'" % instance.primary_node)
12234 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12236 assert old_disk_size >= disk.size, \
12237 ("Retrieved disk size too small (got %s, should be at least %s)" %
12238 (old_disk_size, disk.size))
12240 old_disk_size = None
12242 # We know that (as far as we can test) operations across different
12243 # nodes will succeed, time to run it for real on the backing storage
12244 for node in instance.all_nodes:
12245 self.cfg.SetDiskID(disk, node)
12246 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12248 result.Raise("Grow request failed to node %s" % node)
12250 # And now execute it for logical storage, on the primary node
12251 node = instance.primary_node
12252 self.cfg.SetDiskID(disk, node)
12253 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12255 result.Raise("Grow request failed to node %s" % node)
12257 disk.RecordGrow(self.delta)
12258 self.cfg.Update(instance, feedback_fn)
12260 # Changes have been recorded, release node lock
12261 _ReleaseLocks(self, locking.LEVEL_NODE)
12263 # Downgrade lock while waiting for sync
12264 self.glm.downgrade(locking.LEVEL_INSTANCE)
12266 assert wipe_disks ^ (old_disk_size is None)
12269 assert instance.disks[self.op.disk] == disk
12271 # Wipe newly added disk space
12272 _WipeDisks(self, instance,
12273 disks=[(self.op.disk, disk, old_disk_size)])
12275 if self.op.wait_for_sync:
12276 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12278 self.proc.LogWarning("Disk sync-ing has not returned a good"
12279 " status; please check the instance")
12280 if instance.admin_state != constants.ADMINST_UP:
12281 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12282 elif instance.admin_state != constants.ADMINST_UP:
12283 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12284 " not supposed to be running because no wait for"
12285 " sync mode was requested")
12287 assert self.owned_locks(locking.LEVEL_NODE_RES)
12288 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12291 class LUInstanceQueryData(NoHooksLU):
12292 """Query runtime instance data.
12297 def ExpandNames(self):
12298 self.needed_locks = {}
12300 # Use locking if requested or when non-static information is wanted
12301 if not (self.op.static or self.op.use_locking):
12302 self.LogWarning("Non-static data requested, locks need to be acquired")
12303 self.op.use_locking = True
12305 if self.op.instances or not self.op.use_locking:
12306 # Expand instance names right here
12307 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12309 # Will use acquired locks
12310 self.wanted_names = None
12312 if self.op.use_locking:
12313 self.share_locks = _ShareAll()
12315 if self.wanted_names is None:
12316 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12318 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12320 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12321 self.needed_locks[locking.LEVEL_NODE] = []
12322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12324 def DeclareLocks(self, level):
12325 if self.op.use_locking:
12326 if level == locking.LEVEL_NODEGROUP:
12327 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12329 # Lock all groups used by instances optimistically; this requires going
12330 # via the node before it's locked, requiring verification later on
12331 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12332 frozenset(group_uuid
12333 for instance_name in owned_instances
12335 self.cfg.GetInstanceNodeGroups(instance_name))
12337 elif level == locking.LEVEL_NODE:
12338 self._LockInstancesNodes()
12340 def CheckPrereq(self):
12341 """Check prerequisites.
12343 This only checks the optional instance list against the existing names.
12346 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12347 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12348 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12350 if self.wanted_names is None:
12351 assert self.op.use_locking, "Locking was not used"
12352 self.wanted_names = owned_instances
12354 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12356 if self.op.use_locking:
12357 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12360 assert not (owned_instances or owned_groups or owned_nodes)
12362 self.wanted_instances = instances.values()
12364 def _ComputeBlockdevStatus(self, node, instance, dev):
12365 """Returns the status of a block device
12368 if self.op.static or not node:
12371 self.cfg.SetDiskID(dev, node)
12373 result = self.rpc.call_blockdev_find(node, dev)
12377 result.Raise("Can't compute disk status for %s" % instance.name)
12379 status = result.payload
12383 return (status.dev_path, status.major, status.minor,
12384 status.sync_percent, status.estimated_time,
12385 status.is_degraded, status.ldisk_status)
12387 def _ComputeDiskStatus(self, instance, snode, dev):
12388 """Compute block device status.
12391 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12393 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12395 def _ComputeDiskStatusInner(self, instance, snode, dev):
12396 """Compute block device status.
12398 @attention: The device has to be annotated already.
12401 if dev.dev_type in constants.LDS_DRBD:
12402 # we change the snode then (otherwise we use the one passed in)
12403 if dev.logical_id[0] == instance.primary_node:
12404 snode = dev.logical_id[1]
12406 snode = dev.logical_id[0]
12408 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12410 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12413 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12420 "iv_name": dev.iv_name,
12421 "dev_type": dev.dev_type,
12422 "logical_id": dev.logical_id,
12423 "physical_id": dev.physical_id,
12424 "pstatus": dev_pstatus,
12425 "sstatus": dev_sstatus,
12426 "children": dev_children,
12431 def Exec(self, feedback_fn):
12432 """Gather and return data"""
12435 cluster = self.cfg.GetClusterInfo()
12437 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12438 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12440 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12441 for node in nodes.values()))
12443 group2name_fn = lambda uuid: groups[uuid].name
12445 for instance in self.wanted_instances:
12446 pnode = nodes[instance.primary_node]
12448 if self.op.static or pnode.offline:
12449 remote_state = None
12451 self.LogWarning("Primary node %s is marked offline, returning static"
12452 " information only for instance %s" %
12453 (pnode.name, instance.name))
12455 remote_info = self.rpc.call_instance_info(instance.primary_node,
12457 instance.hypervisor)
12458 remote_info.Raise("Error checking node %s" % instance.primary_node)
12459 remote_info = remote_info.payload
12460 if remote_info and "state" in remote_info:
12461 remote_state = "up"
12463 if instance.admin_state == constants.ADMINST_UP:
12464 remote_state = "down"
12466 remote_state = instance.admin_state
12468 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12471 snodes_group_uuids = [nodes[snode_name].group
12472 for snode_name in instance.secondary_nodes]
12474 result[instance.name] = {
12475 "name": instance.name,
12476 "config_state": instance.admin_state,
12477 "run_state": remote_state,
12478 "pnode": instance.primary_node,
12479 "pnode_group_uuid": pnode.group,
12480 "pnode_group_name": group2name_fn(pnode.group),
12481 "snodes": instance.secondary_nodes,
12482 "snodes_group_uuids": snodes_group_uuids,
12483 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12485 # this happens to be the same format used for hooks
12486 "nics": _NICListToTuple(self, instance.nics),
12487 "disk_template": instance.disk_template,
12489 "hypervisor": instance.hypervisor,
12490 "network_port": instance.network_port,
12491 "hv_instance": instance.hvparams,
12492 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12493 "be_instance": instance.beparams,
12494 "be_actual": cluster.FillBE(instance),
12495 "os_instance": instance.osparams,
12496 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12497 "serial_no": instance.serial_no,
12498 "mtime": instance.mtime,
12499 "ctime": instance.ctime,
12500 "uuid": instance.uuid,
12506 def PrepareContainerMods(mods, private_fn):
12507 """Prepares a list of container modifications by adding a private data field.
12509 @type mods: list of tuples; (operation, index, parameters)
12510 @param mods: List of modifications
12511 @type private_fn: callable or None
12512 @param private_fn: Callable for constructing a private data field for a
12517 if private_fn is None:
12522 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12525 #: Type description for changes as returned by L{ApplyContainerMods}'s
12527 _TApplyContModsCbChanges = \
12528 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12529 ht.TNonEmptyString,
12534 def ApplyContainerMods(kind, container, chgdesc, mods,
12535 create_fn, modify_fn, remove_fn):
12536 """Applies descriptions in C{mods} to C{container}.
12539 @param kind: One-word item description
12540 @type container: list
12541 @param container: Container to modify
12542 @type chgdesc: None or list
12543 @param chgdesc: List of applied changes
12545 @param mods: Modifications as returned by L{PrepareContainerMods}
12546 @type create_fn: callable
12547 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12548 receives absolute item index, parameters and private data object as added
12549 by L{PrepareContainerMods}, returns tuple containing new item and changes
12551 @type modify_fn: callable
12552 @param modify_fn: Callback for modifying an existing item
12553 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12554 and private data object as added by L{PrepareContainerMods}, returns
12556 @type remove_fn: callable
12557 @param remove_fn: Callback on removing item; receives absolute item index,
12558 item and private data object as added by L{PrepareContainerMods}
12561 for (op, idx, params, private) in mods:
12564 absidx = len(container) - 1
12566 raise IndexError("Not accepting negative indices other than -1")
12567 elif idx > len(container):
12568 raise IndexError("Got %s index %s, but there are only %s" %
12569 (kind, idx, len(container)))
12575 if op == constants.DDM_ADD:
12576 # Calculate where item will be added
12578 addidx = len(container)
12582 if create_fn is None:
12585 (item, changes) = create_fn(addidx, params, private)
12588 container.append(item)
12591 assert idx <= len(container)
12592 # list.insert does so before the specified index
12593 container.insert(idx, item)
12595 # Retrieve existing item
12597 item = container[absidx]
12599 raise IndexError("Invalid %s index %s" % (kind, idx))
12601 if op == constants.DDM_REMOVE:
12604 if remove_fn is not None:
12605 remove_fn(absidx, item, private)
12607 changes = [("%s/%s" % (kind, absidx), "remove")]
12609 assert container[absidx] == item
12610 del container[absidx]
12611 elif op == constants.DDM_MODIFY:
12612 if modify_fn is not None:
12613 changes = modify_fn(absidx, item, params, private)
12615 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12617 assert _TApplyContModsCbChanges(changes)
12619 if not (chgdesc is None or changes is None):
12620 chgdesc.extend(changes)
12623 def _UpdateIvNames(base_index, disks):
12624 """Updates the C{iv_name} attribute of disks.
12626 @type disks: list of L{objects.Disk}
12629 for (idx, disk) in enumerate(disks):
12630 disk.iv_name = "disk/%s" % (base_index + idx, )
12633 class _InstNicModPrivate:
12634 """Data structure for network interface modifications.
12636 Used by L{LUInstanceSetParams}.
12639 def __init__(self):
12644 class LUInstanceSetParams(LogicalUnit):
12645 """Modifies an instances's parameters.
12648 HPATH = "instance-modify"
12649 HTYPE = constants.HTYPE_INSTANCE
12653 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12654 assert ht.TList(mods)
12655 assert not mods or len(mods[0]) in (2, 3)
12657 if mods and len(mods[0]) == 2:
12661 for op, params in mods:
12662 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12663 result.append((op, -1, params))
12667 raise errors.OpPrereqError("Only one %s add or remove operation is"
12668 " supported at a time" % kind,
12669 errors.ECODE_INVAL)
12671 result.append((constants.DDM_MODIFY, op, params))
12673 assert verify_fn(result)
12680 def _CheckMods(kind, mods, key_types, item_fn):
12681 """Ensures requested disk/NIC modifications are valid.
12684 for (op, _, params) in mods:
12685 assert ht.TDict(params)
12687 utils.ForceDictType(params, key_types)
12689 if op == constants.DDM_REMOVE:
12691 raise errors.OpPrereqError("No settings should be passed when"
12692 " removing a %s" % kind,
12693 errors.ECODE_INVAL)
12694 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12695 item_fn(op, params)
12697 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12700 def _VerifyDiskModification(op, params):
12701 """Verifies a disk modification.
12704 if op == constants.DDM_ADD:
12705 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12706 if mode not in constants.DISK_ACCESS_SET:
12707 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12708 errors.ECODE_INVAL)
12710 size = params.get(constants.IDISK_SIZE, None)
12712 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12713 constants.IDISK_SIZE, errors.ECODE_INVAL)
12717 except (TypeError, ValueError), err:
12718 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12719 errors.ECODE_INVAL)
12721 params[constants.IDISK_SIZE] = size
12723 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12724 raise errors.OpPrereqError("Disk size change not possible, use"
12725 " grow-disk", errors.ECODE_INVAL)
12728 def _VerifyNicModification(op, params):
12729 """Verifies a network interface modification.
12732 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12733 ip = params.get(constants.INIC_IP, None)
12734 req_net = params.get(constants.INIC_NETWORK, None)
12735 link = params.get(constants.NIC_LINK, None)
12736 mode = params.get(constants.NIC_MODE, None)
12737 if req_net is not None:
12738 if req_net.lower() == constants.VALUE_NONE:
12739 params[constants.INIC_NETWORK] = None
12741 elif link is not None or mode is not None:
12742 raise errors.OpPrereqError("If network is given"
12743 " mode or link should not",
12744 errors.ECODE_INVAL)
12746 if op == constants.DDM_ADD:
12747 macaddr = params.get(constants.INIC_MAC, None)
12748 if macaddr is None:
12749 params[constants.INIC_MAC] = constants.VALUE_AUTO
12752 if ip.lower() == constants.VALUE_NONE:
12753 params[constants.INIC_IP] = None
12755 if ip.lower() == constants.NIC_IP_POOL:
12756 if op == constants.DDM_ADD and req_net is None:
12757 raise errors.OpPrereqError("If ip=pool, parameter network"
12759 errors.ECODE_INVAL)
12761 if not netutils.IPAddress.IsValid(ip):
12762 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12763 errors.ECODE_INVAL)
12765 if constants.INIC_MAC in params:
12766 macaddr = params[constants.INIC_MAC]
12767 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12768 macaddr = utils.NormalizeAndValidateMac(macaddr)
12770 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12771 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12772 " modifying an existing NIC",
12773 errors.ECODE_INVAL)
12775 def CheckArguments(self):
12776 if not (self.op.nics or self.op.disks or self.op.disk_template or
12777 self.op.hvparams or self.op.beparams or self.op.os_name or
12778 self.op.offline is not None or self.op.runtime_mem):
12779 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12781 if self.op.hvparams:
12782 _CheckGlobalHvParams(self.op.hvparams)
12784 self.op.disks = self._UpgradeDiskNicMods(
12785 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12786 self.op.nics = self._UpgradeDiskNicMods(
12787 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12789 # Check disk modifications
12790 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12791 self._VerifyDiskModification)
12793 if self.op.disks and self.op.disk_template is not None:
12794 raise errors.OpPrereqError("Disk template conversion and other disk"
12795 " changes not supported at the same time",
12796 errors.ECODE_INVAL)
12798 if (self.op.disk_template and
12799 self.op.disk_template in constants.DTS_INT_MIRROR and
12800 self.op.remote_node is None):
12801 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12802 " one requires specifying a secondary node",
12803 errors.ECODE_INVAL)
12805 # Check NIC modifications
12806 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12807 self._VerifyNicModification)
12809 def ExpandNames(self):
12810 self._ExpandAndLockInstance()
12811 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12812 # Can't even acquire node locks in shared mode as upcoming changes in
12813 # Ganeti 2.6 will start to modify the node object on disk conversion
12814 self.needed_locks[locking.LEVEL_NODE] = []
12815 self.needed_locks[locking.LEVEL_NODE_RES] = []
12816 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12817 # Look node group to look up the ipolicy
12818 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12820 def DeclareLocks(self, level):
12821 if level == locking.LEVEL_NODEGROUP:
12822 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12823 # Acquire locks for the instance's nodegroups optimistically. Needs
12824 # to be verified in CheckPrereq
12825 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12826 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12827 elif level == locking.LEVEL_NODE:
12828 self._LockInstancesNodes()
12829 if self.op.disk_template and self.op.remote_node:
12830 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12831 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12832 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12834 self.needed_locks[locking.LEVEL_NODE_RES] = \
12835 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12837 def BuildHooksEnv(self):
12838 """Build hooks env.
12840 This runs on the master, primary and secondaries.
12844 if constants.BE_MINMEM in self.be_new:
12845 args["minmem"] = self.be_new[constants.BE_MINMEM]
12846 if constants.BE_MAXMEM in self.be_new:
12847 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12848 if constants.BE_VCPUS in self.be_new:
12849 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12850 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12851 # information at all.
12853 if self._new_nics is not None:
12856 for nic in self._new_nics:
12857 n = copy.deepcopy(nic)
12858 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12859 n.nicparams = nicparams
12860 nics.append(_NICToTuple(self, n))
12862 args["nics"] = nics
12864 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12865 if self.op.disk_template:
12866 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12867 if self.op.runtime_mem:
12868 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12872 def BuildHooksNodes(self):
12873 """Build hooks nodes.
12876 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12879 def _PrepareNicModification(self, params, private, old_ip, old_net,
12880 old_params, cluster, pnode):
12882 update_params_dict = dict([(key, params[key])
12883 for key in constants.NICS_PARAMETERS
12886 req_link = update_params_dict.get(constants.NIC_LINK, None)
12887 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12889 new_net = params.get(constants.INIC_NETWORK, old_net)
12890 if new_net is not None:
12891 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12892 if netparams is None:
12893 raise errors.OpPrereqError("No netparams found for the network"
12894 " %s, propably not connected." % new_net,
12895 errors.ECODE_INVAL)
12896 new_params = dict(netparams)
12898 new_params = _GetUpdatedParams(old_params, update_params_dict)
12900 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12902 new_filled_params = cluster.SimpleFillNIC(new_params)
12903 objects.NIC.CheckParameterSyntax(new_filled_params)
12905 new_mode = new_filled_params[constants.NIC_MODE]
12906 if new_mode == constants.NIC_MODE_BRIDGED:
12907 bridge = new_filled_params[constants.NIC_LINK]
12908 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12910 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12912 self.warn.append(msg)
12914 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12916 elif new_mode == constants.NIC_MODE_ROUTED:
12917 ip = params.get(constants.INIC_IP, old_ip)
12919 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12920 " on a routed NIC", errors.ECODE_INVAL)
12922 if constants.INIC_MAC in params:
12923 mac = params[constants.INIC_MAC]
12925 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12926 errors.ECODE_INVAL)
12927 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12928 # otherwise generate the MAC address
12929 params[constants.INIC_MAC] = \
12930 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12932 # or validate/reserve the current one
12934 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12935 except errors.ReservationError:
12936 raise errors.OpPrereqError("MAC address '%s' already in use"
12937 " in cluster" % mac,
12938 errors.ECODE_NOTUNIQUE)
12939 elif new_net != old_net:
12941 def get_net_prefix(net):
12943 uuid = self.cfg.LookupNetwork(net)
12945 nobj = self.cfg.GetNetwork(uuid)
12946 return nobj.mac_prefix
12949 new_prefix = get_net_prefix(new_net)
12950 old_prefix = get_net_prefix(old_net)
12951 if old_prefix != new_prefix:
12952 params[constants.INIC_MAC] = \
12953 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12955 #if there is a change in nic-network configuration
12956 new_ip = params.get(constants.INIC_IP, old_ip)
12957 if (new_ip, new_net) != (old_ip, old_net):
12960 if new_ip.lower() == constants.NIC_IP_POOL:
12962 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12963 except errors.ReservationError:
12964 raise errors.OpPrereqError("Unable to get a free IP"
12965 " from the address pool",
12966 errors.ECODE_STATE)
12967 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12968 params[constants.INIC_IP] = new_ip
12969 elif new_ip != old_ip or new_net != old_net:
12971 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12972 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12973 except errors.ReservationError:
12974 raise errors.OpPrereqError("IP %s not available in network %s" %
12976 errors.ECODE_NOTUNIQUE)
12977 elif new_ip.lower() == constants.NIC_IP_POOL:
12978 raise errors.OpPrereqError("ip=pool, but no network found",
12979 errors.ECODE_INVAL)
12982 if self.op.conflicts_check:
12983 _CheckForConflictingIp(self, new_ip, pnode)
12988 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12989 except errors.AddressPoolError:
12990 logging.warning("Release IP %s not contained in network %s",
12993 # there are no changes in (net, ip) tuple
12994 elif (old_net is not None and
12995 (req_link is not None or req_mode is not None)):
12996 raise errors.OpPrereqError("Not allowed to change link or mode of"
12997 " a NIC that is connected to a network.",
12998 errors.ECODE_INVAL)
13000 private.params = new_params
13001 private.filled = new_filled_params
13003 def CheckPrereq(self):
13004 """Check prerequisites.
13006 This only checks the instance list against the existing names.
13009 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13010 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13012 cluster = self.cluster = self.cfg.GetClusterInfo()
13013 assert self.instance is not None, \
13014 "Cannot retrieve locked instance %s" % self.op.instance_name
13016 pnode = instance.primary_node
13017 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13018 nodelist = list(instance.all_nodes)
13019 pnode_info = self.cfg.GetNodeInfo(pnode)
13020 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13022 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13023 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13024 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13026 # dictionary with instance information after the modification
13029 # Prepare disk/NIC modifications
13030 self.diskmod = PrepareContainerMods(self.op.disks, None)
13031 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13034 if self.op.os_name and not self.op.force:
13035 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13036 self.op.force_variant)
13037 instance_os = self.op.os_name
13039 instance_os = instance.os
13041 assert not (self.op.disk_template and self.op.disks), \
13042 "Can't modify disk template and apply disk changes at the same time"
13044 if self.op.disk_template:
13045 if instance.disk_template == self.op.disk_template:
13046 raise errors.OpPrereqError("Instance already has disk template %s" %
13047 instance.disk_template, errors.ECODE_INVAL)
13049 if (instance.disk_template,
13050 self.op.disk_template) not in self._DISK_CONVERSIONS:
13051 raise errors.OpPrereqError("Unsupported disk template conversion from"
13052 " %s to %s" % (instance.disk_template,
13053 self.op.disk_template),
13054 errors.ECODE_INVAL)
13055 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13056 msg="cannot change disk template")
13057 if self.op.disk_template in constants.DTS_INT_MIRROR:
13058 if self.op.remote_node == pnode:
13059 raise errors.OpPrereqError("Given new secondary node %s is the same"
13060 " as the primary node of the instance" %
13061 self.op.remote_node, errors.ECODE_STATE)
13062 _CheckNodeOnline(self, self.op.remote_node)
13063 _CheckNodeNotDrained(self, self.op.remote_node)
13064 # FIXME: here we assume that the old instance type is DT_PLAIN
13065 assert instance.disk_template == constants.DT_PLAIN
13066 disks = [{constants.IDISK_SIZE: d.size,
13067 constants.IDISK_VG: d.logical_id[0]}
13068 for d in instance.disks]
13069 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13070 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13072 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13073 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13074 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13076 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13077 ignore=self.op.ignore_ipolicy)
13078 if pnode_info.group != snode_info.group:
13079 self.LogWarning("The primary and secondary nodes are in two"
13080 " different node groups; the disk parameters"
13081 " from the first disk's node group will be"
13084 # hvparams processing
13085 if self.op.hvparams:
13086 hv_type = instance.hypervisor
13087 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13088 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13089 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13092 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13093 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13094 self.hv_proposed = self.hv_new = hv_new # the new actual values
13095 self.hv_inst = i_hvdict # the new dict (without defaults)
13097 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13099 self.hv_new = self.hv_inst = {}
13101 # beparams processing
13102 if self.op.beparams:
13103 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13105 objects.UpgradeBeParams(i_bedict)
13106 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13107 be_new = cluster.SimpleFillBE(i_bedict)
13108 self.be_proposed = self.be_new = be_new # the new actual values
13109 self.be_inst = i_bedict # the new dict (without defaults)
13111 self.be_new = self.be_inst = {}
13112 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13113 be_old = cluster.FillBE(instance)
13115 # CPU param validation -- checking every time a parameter is
13116 # changed to cover all cases where either CPU mask or vcpus have
13118 if (constants.BE_VCPUS in self.be_proposed and
13119 constants.HV_CPU_MASK in self.hv_proposed):
13121 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13122 # Verify mask is consistent with number of vCPUs. Can skip this
13123 # test if only 1 entry in the CPU mask, which means same mask
13124 # is applied to all vCPUs.
13125 if (len(cpu_list) > 1 and
13126 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13127 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13129 (self.be_proposed[constants.BE_VCPUS],
13130 self.hv_proposed[constants.HV_CPU_MASK]),
13131 errors.ECODE_INVAL)
13133 # Only perform this test if a new CPU mask is given
13134 if constants.HV_CPU_MASK in self.hv_new:
13135 # Calculate the largest CPU number requested
13136 max_requested_cpu = max(map(max, cpu_list))
13137 # Check that all of the instance's nodes have enough physical CPUs to
13138 # satisfy the requested CPU mask
13139 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13140 max_requested_cpu + 1, instance.hypervisor)
13142 # osparams processing
13143 if self.op.osparams:
13144 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13145 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13146 self.os_inst = i_osdict # the new dict (without defaults)
13152 #TODO(dynmem): do the appropriate check involving MINMEM
13153 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13154 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13155 mem_check_list = [pnode]
13156 if be_new[constants.BE_AUTO_BALANCE]:
13157 # either we changed auto_balance to yes or it was from before
13158 mem_check_list.extend(instance.secondary_nodes)
13159 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13160 instance.hypervisor)
13161 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13162 [instance.hypervisor])
13163 pninfo = nodeinfo[pnode]
13164 msg = pninfo.fail_msg
13166 # Assume the primary node is unreachable and go ahead
13167 self.warn.append("Can't get info from primary node %s: %s" %
13170 (_, _, (pnhvinfo, )) = pninfo.payload
13171 if not isinstance(pnhvinfo.get("memory_free", None), int):
13172 self.warn.append("Node data from primary node %s doesn't contain"
13173 " free memory information" % pnode)
13174 elif instance_info.fail_msg:
13175 self.warn.append("Can't get instance runtime information: %s" %
13176 instance_info.fail_msg)
13178 if instance_info.payload:
13179 current_mem = int(instance_info.payload["memory"])
13181 # Assume instance not running
13182 # (there is a slight race condition here, but it's not very
13183 # probable, and we have no other way to check)
13184 # TODO: Describe race condition
13186 #TODO(dynmem): do the appropriate check involving MINMEM
13187 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13188 pnhvinfo["memory_free"])
13190 raise errors.OpPrereqError("This change will prevent the instance"
13191 " from starting, due to %d MB of memory"
13192 " missing on its primary node" %
13193 miss_mem, errors.ECODE_NORES)
13195 if be_new[constants.BE_AUTO_BALANCE]:
13196 for node, nres in nodeinfo.items():
13197 if node not in instance.secondary_nodes:
13199 nres.Raise("Can't get info from secondary node %s" % node,
13200 prereq=True, ecode=errors.ECODE_STATE)
13201 (_, _, (nhvinfo, )) = nres.payload
13202 if not isinstance(nhvinfo.get("memory_free", None), int):
13203 raise errors.OpPrereqError("Secondary node %s didn't return free"
13204 " memory information" % node,
13205 errors.ECODE_STATE)
13206 #TODO(dynmem): do the appropriate check involving MINMEM
13207 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13208 raise errors.OpPrereqError("This change will prevent the instance"
13209 " from failover to its secondary node"
13210 " %s, due to not enough memory" % node,
13211 errors.ECODE_STATE)
13213 if self.op.runtime_mem:
13214 remote_info = self.rpc.call_instance_info(instance.primary_node,
13216 instance.hypervisor)
13217 remote_info.Raise("Error checking node %s" % instance.primary_node)
13218 if not remote_info.payload: # not running already
13219 raise errors.OpPrereqError("Instance %s is not running" %
13220 instance.name, errors.ECODE_STATE)
13222 current_memory = remote_info.payload["memory"]
13223 if (not self.op.force and
13224 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13225 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13226 raise errors.OpPrereqError("Instance %s must have memory between %d"
13227 " and %d MB of memory unless --force is"
13230 self.be_proposed[constants.BE_MINMEM],
13231 self.be_proposed[constants.BE_MAXMEM]),
13232 errors.ECODE_INVAL)
13234 delta = self.op.runtime_mem - current_memory
13236 _CheckNodeFreeMemory(self, instance.primary_node,
13237 "ballooning memory for instance %s" %
13238 instance.name, delta, instance.hypervisor)
13240 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13241 raise errors.OpPrereqError("Disk operations not supported for"
13242 " diskless instances", errors.ECODE_INVAL)
13244 def _PrepareNicCreate(_, params, private):
13245 self._PrepareNicModification(params, private, None, None,
13246 {}, cluster, pnode)
13247 return (None, None)
13249 def _PrepareNicMod(_, nic, params, private):
13250 self._PrepareNicModification(params, private, nic.ip, nic.network,
13251 nic.nicparams, cluster, pnode)
13254 def _PrepareNicRemove(_, params, __):
13256 net = params.network
13257 if net is not None and ip is not None:
13258 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13260 # Verify NIC changes (operating on copy)
13261 nics = instance.nics[:]
13262 ApplyContainerMods("NIC", nics, None, self.nicmod,
13263 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13264 if len(nics) > constants.MAX_NICS:
13265 raise errors.OpPrereqError("Instance has too many network interfaces"
13266 " (%d), cannot add more" % constants.MAX_NICS,
13267 errors.ECODE_STATE)
13269 # Verify disk changes (operating on a copy)
13270 disks = instance.disks[:]
13271 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13272 if len(disks) > constants.MAX_DISKS:
13273 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13274 " more" % constants.MAX_DISKS,
13275 errors.ECODE_STATE)
13276 disk_sizes = [disk.size for disk in instance.disks]
13277 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13279 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13280 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13282 if self.op.offline is not None:
13283 if self.op.offline:
13284 msg = "can't change to offline"
13286 msg = "can't change to online"
13287 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13289 # Pre-compute NIC changes (necessary to use result in hooks)
13290 self._nic_chgdesc = []
13292 # Operate on copies as this is still in prereq
13293 nics = [nic.Copy() for nic in instance.nics]
13294 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13295 self._CreateNewNic, self._ApplyNicMods, None)
13296 self._new_nics = nics
13297 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13299 self._new_nics = None
13300 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13302 if not self.op.ignore_ipolicy:
13303 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13306 # Fill ispec with backend parameters
13307 ispec[constants.ISPEC_SPINDLE_USE] = \
13308 self.be_new.get(constants.BE_SPINDLE_USE, None)
13309 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13312 # Copy ispec to verify parameters with min/max values separately
13313 ispec_max = ispec.copy()
13314 ispec_max[constants.ISPEC_MEM_SIZE] = \
13315 self.be_new.get(constants.BE_MAXMEM, None)
13316 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13317 ispec_min = ispec.copy()
13318 ispec_min[constants.ISPEC_MEM_SIZE] = \
13319 self.be_new.get(constants.BE_MINMEM, None)
13320 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13322 if (res_max or res_min):
13323 # FIXME: Improve error message by including information about whether
13324 # the upper or lower limit of the parameter fails the ipolicy.
13325 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13326 (group_info, group_info.name,
13327 utils.CommaJoin(set(res_max + res_min))))
13328 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13330 def _ConvertPlainToDrbd(self, feedback_fn):
13331 """Converts an instance from plain to drbd.
13334 feedback_fn("Converting template to drbd")
13335 instance = self.instance
13336 pnode = instance.primary_node
13337 snode = self.op.remote_node
13339 assert instance.disk_template == constants.DT_PLAIN
13341 # create a fake disk info for _GenerateDiskTemplate
13342 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13343 constants.IDISK_VG: d.logical_id[0]}
13344 for d in instance.disks]
13345 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13346 instance.name, pnode, [snode],
13347 disk_info, None, None, 0, feedback_fn,
13349 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13351 info = _GetInstanceInfoText(instance)
13352 feedback_fn("Creating additional volumes...")
13353 # first, create the missing data and meta devices
13354 for disk in anno_disks:
13355 # unfortunately this is... not too nice
13356 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13358 for child in disk.children:
13359 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13360 # at this stage, all new LVs have been created, we can rename the
13362 feedback_fn("Renaming original volumes...")
13363 rename_list = [(o, n.children[0].logical_id)
13364 for (o, n) in zip(instance.disks, new_disks)]
13365 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13366 result.Raise("Failed to rename original LVs")
13368 feedback_fn("Initializing DRBD devices...")
13369 # all child devices are in place, we can now create the DRBD devices
13370 for disk in anno_disks:
13371 for node in [pnode, snode]:
13372 f_create = node == pnode
13373 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13375 # at this point, the instance has been modified
13376 instance.disk_template = constants.DT_DRBD8
13377 instance.disks = new_disks
13378 self.cfg.Update(instance, feedback_fn)
13380 # Release node locks while waiting for sync
13381 _ReleaseLocks(self, locking.LEVEL_NODE)
13383 # disks are created, waiting for sync
13384 disk_abort = not _WaitForSync(self, instance,
13385 oneshot=not self.op.wait_for_sync)
13387 raise errors.OpExecError("There are some degraded disks for"
13388 " this instance, please cleanup manually")
13390 # Node resource locks will be released by caller
13392 def _ConvertDrbdToPlain(self, feedback_fn):
13393 """Converts an instance from drbd to plain.
13396 instance = self.instance
13398 assert len(instance.secondary_nodes) == 1
13399 assert instance.disk_template == constants.DT_DRBD8
13401 pnode = instance.primary_node
13402 snode = instance.secondary_nodes[0]
13403 feedback_fn("Converting template to plain")
13405 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13406 new_disks = [d.children[0] for d in instance.disks]
13408 # copy over size and mode
13409 for parent, child in zip(old_disks, new_disks):
13410 child.size = parent.size
13411 child.mode = parent.mode
13413 # this is a DRBD disk, return its port to the pool
13414 # NOTE: this must be done right before the call to cfg.Update!
13415 for disk in old_disks:
13416 tcp_port = disk.logical_id[2]
13417 self.cfg.AddTcpUdpPort(tcp_port)
13419 # update instance structure
13420 instance.disks = new_disks
13421 instance.disk_template = constants.DT_PLAIN
13422 self.cfg.Update(instance, feedback_fn)
13424 # Release locks in case removing disks takes a while
13425 _ReleaseLocks(self, locking.LEVEL_NODE)
13427 feedback_fn("Removing volumes on the secondary node...")
13428 for disk in old_disks:
13429 self.cfg.SetDiskID(disk, snode)
13430 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13432 self.LogWarning("Could not remove block device %s on node %s,"
13433 " continuing anyway: %s", disk.iv_name, snode, msg)
13435 feedback_fn("Removing unneeded volumes on the primary node...")
13436 for idx, disk in enumerate(old_disks):
13437 meta = disk.children[1]
13438 self.cfg.SetDiskID(meta, pnode)
13439 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13441 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13442 " continuing anyway: %s", idx, pnode, msg)
13444 def _CreateNewDisk(self, idx, params, _):
13445 """Creates a new disk.
13448 instance = self.instance
13451 if instance.disk_template in constants.DTS_FILEBASED:
13452 (file_driver, file_path) = instance.disks[0].logical_id
13453 file_path = os.path.dirname(file_path)
13455 file_driver = file_path = None
13458 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13459 instance.primary_node, instance.secondary_nodes,
13460 [params], file_path, file_driver, idx,
13461 self.Log, self.diskparams)[0]
13463 info = _GetInstanceInfoText(instance)
13465 logging.info("Creating volume %s for instance %s",
13466 disk.iv_name, instance.name)
13467 # Note: this needs to be kept in sync with _CreateDisks
13469 for node in instance.all_nodes:
13470 f_create = (node == instance.primary_node)
13472 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13473 except errors.OpExecError, err:
13474 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13475 disk.iv_name, disk, node, err)
13478 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13482 def _ModifyDisk(idx, disk, params, _):
13483 """Modifies a disk.
13486 disk.mode = params[constants.IDISK_MODE]
13489 ("disk.mode/%d" % idx, disk.mode),
13492 def _RemoveDisk(self, idx, root, _):
13496 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13497 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13498 self.cfg.SetDiskID(disk, node)
13499 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13501 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13502 " continuing anyway", idx, node, msg)
13504 # if this is a DRBD disk, return its port to the pool
13505 if root.dev_type in constants.LDS_DRBD:
13506 self.cfg.AddTcpUdpPort(root.logical_id[2])
13509 def _CreateNewNic(idx, params, private):
13510 """Creates data structure for a new network interface.
13513 mac = params[constants.INIC_MAC]
13514 ip = params.get(constants.INIC_IP, None)
13515 net = params.get(constants.INIC_NETWORK, None)
13516 #TODO: not private.filled?? can a nic have no nicparams??
13517 nicparams = private.filled
13519 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13521 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13522 (mac, ip, private.filled[constants.NIC_MODE],
13523 private.filled[constants.NIC_LINK],
13528 def _ApplyNicMods(idx, nic, params, private):
13529 """Modifies a network interface.
13534 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13536 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13537 setattr(nic, key, params[key])
13540 nic.nicparams = private.filled
13542 for (key, val) in nic.nicparams.items():
13543 changes.append(("nic.%s/%d" % (key, idx), val))
13547 def Exec(self, feedback_fn):
13548 """Modifies an instance.
13550 All parameters take effect only at the next restart of the instance.
13553 # Process here the warnings from CheckPrereq, as we don't have a
13554 # feedback_fn there.
13555 # TODO: Replace with self.LogWarning
13556 for warn in self.warn:
13557 feedback_fn("WARNING: %s" % warn)
13559 assert ((self.op.disk_template is None) ^
13560 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13561 "Not owning any node resource locks"
13564 instance = self.instance
13567 if self.op.runtime_mem:
13568 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13570 self.op.runtime_mem)
13571 rpcres.Raise("Cannot modify instance runtime memory")
13572 result.append(("runtime_memory", self.op.runtime_mem))
13574 # Apply disk changes
13575 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13576 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13577 _UpdateIvNames(0, instance.disks)
13579 if self.op.disk_template:
13581 check_nodes = set(instance.all_nodes)
13582 if self.op.remote_node:
13583 check_nodes.add(self.op.remote_node)
13584 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13585 owned = self.owned_locks(level)
13586 assert not (check_nodes - owned), \
13587 ("Not owning the correct locks, owning %r, expected at least %r" %
13588 (owned, check_nodes))
13590 r_shut = _ShutdownInstanceDisks(self, instance)
13592 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13593 " proceed with disk template conversion")
13594 mode = (instance.disk_template, self.op.disk_template)
13596 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13598 self.cfg.ReleaseDRBDMinors(instance.name)
13600 result.append(("disk_template", self.op.disk_template))
13602 assert instance.disk_template == self.op.disk_template, \
13603 ("Expected disk template '%s', found '%s'" %
13604 (self.op.disk_template, instance.disk_template))
13606 # Release node and resource locks if there are any (they might already have
13607 # been released during disk conversion)
13608 _ReleaseLocks(self, locking.LEVEL_NODE)
13609 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13611 # Apply NIC changes
13612 if self._new_nics is not None:
13613 instance.nics = self._new_nics
13614 result.extend(self._nic_chgdesc)
13617 if self.op.hvparams:
13618 instance.hvparams = self.hv_inst
13619 for key, val in self.op.hvparams.iteritems():
13620 result.append(("hv/%s" % key, val))
13623 if self.op.beparams:
13624 instance.beparams = self.be_inst
13625 for key, val in self.op.beparams.iteritems():
13626 result.append(("be/%s" % key, val))
13629 if self.op.os_name:
13630 instance.os = self.op.os_name
13633 if self.op.osparams:
13634 instance.osparams = self.os_inst
13635 for key, val in self.op.osparams.iteritems():
13636 result.append(("os/%s" % key, val))
13638 if self.op.offline is None:
13641 elif self.op.offline:
13642 # Mark instance as offline
13643 self.cfg.MarkInstanceOffline(instance.name)
13644 result.append(("admin_state", constants.ADMINST_OFFLINE))
13646 # Mark instance as online, but stopped
13647 self.cfg.MarkInstanceDown(instance.name)
13648 result.append(("admin_state", constants.ADMINST_DOWN))
13650 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13652 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13653 self.owned_locks(locking.LEVEL_NODE)), \
13654 "All node locks should have been released by now"
13658 _DISK_CONVERSIONS = {
13659 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13660 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13664 class LUInstanceChangeGroup(LogicalUnit):
13665 HPATH = "instance-change-group"
13666 HTYPE = constants.HTYPE_INSTANCE
13669 def ExpandNames(self):
13670 self.share_locks = _ShareAll()
13671 self.needed_locks = {
13672 locking.LEVEL_NODEGROUP: [],
13673 locking.LEVEL_NODE: [],
13676 self._ExpandAndLockInstance()
13678 if self.op.target_groups:
13679 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13680 self.op.target_groups)
13682 self.req_target_uuids = None
13684 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13686 def DeclareLocks(self, level):
13687 if level == locking.LEVEL_NODEGROUP:
13688 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13690 if self.req_target_uuids:
13691 lock_groups = set(self.req_target_uuids)
13693 # Lock all groups used by instance optimistically; this requires going
13694 # via the node before it's locked, requiring verification later on
13695 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13696 lock_groups.update(instance_groups)
13698 # No target groups, need to lock all of them
13699 lock_groups = locking.ALL_SET
13701 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13703 elif level == locking.LEVEL_NODE:
13704 if self.req_target_uuids:
13705 # Lock all nodes used by instances
13706 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13707 self._LockInstancesNodes()
13709 # Lock all nodes in all potential target groups
13710 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13711 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13712 member_nodes = [node_name
13713 for group in lock_groups
13714 for node_name in self.cfg.GetNodeGroup(group).members]
13715 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13717 # Lock all nodes as all groups are potential targets
13718 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13720 def CheckPrereq(self):
13721 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13722 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13723 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13725 assert (self.req_target_uuids is None or
13726 owned_groups.issuperset(self.req_target_uuids))
13727 assert owned_instances == set([self.op.instance_name])
13729 # Get instance information
13730 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13732 # Check if node groups for locked instance are still correct
13733 assert owned_nodes.issuperset(self.instance.all_nodes), \
13734 ("Instance %s's nodes changed while we kept the lock" %
13735 self.op.instance_name)
13737 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13740 if self.req_target_uuids:
13741 # User requested specific target groups
13742 self.target_uuids = frozenset(self.req_target_uuids)
13744 # All groups except those used by the instance are potential targets
13745 self.target_uuids = owned_groups - inst_groups
13747 conflicting_groups = self.target_uuids & inst_groups
13748 if conflicting_groups:
13749 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13750 " used by the instance '%s'" %
13751 (utils.CommaJoin(conflicting_groups),
13752 self.op.instance_name),
13753 errors.ECODE_INVAL)
13755 if not self.target_uuids:
13756 raise errors.OpPrereqError("There are no possible target groups",
13757 errors.ECODE_INVAL)
13759 def BuildHooksEnv(self):
13760 """Build hooks env.
13763 assert self.target_uuids
13766 "TARGET_GROUPS": " ".join(self.target_uuids),
13769 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13773 def BuildHooksNodes(self):
13774 """Build hooks nodes.
13777 mn = self.cfg.GetMasterNode()
13778 return ([mn], [mn])
13780 def Exec(self, feedback_fn):
13781 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13783 assert instances == [self.op.instance_name], "Instance not locked"
13785 req = iallocator.IAReqGroupChange(instances=instances,
13786 target_groups=list(self.target_uuids))
13787 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13789 ial.Run(self.op.iallocator)
13791 if not ial.success:
13792 raise errors.OpPrereqError("Can't compute solution for changing group of"
13793 " instance '%s' using iallocator '%s': %s" %
13794 (self.op.instance_name, self.op.iallocator,
13795 ial.info), errors.ECODE_NORES)
13797 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13799 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13800 " instance '%s'", len(jobs), self.op.instance_name)
13802 return ResultWithJobs(jobs)
13805 class LUBackupQuery(NoHooksLU):
13806 """Query the exports list
13811 def CheckArguments(self):
13812 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13813 ["node", "export"], self.op.use_locking)
13815 def ExpandNames(self):
13816 self.expq.ExpandNames(self)
13818 def DeclareLocks(self, level):
13819 self.expq.DeclareLocks(self, level)
13821 def Exec(self, feedback_fn):
13824 for (node, expname) in self.expq.OldStyleQuery(self):
13825 if expname is None:
13826 result[node] = False
13828 result.setdefault(node, []).append(expname)
13833 class _ExportQuery(_QueryBase):
13834 FIELDS = query.EXPORT_FIELDS
13836 #: The node name is not a unique key for this query
13837 SORT_FIELD = "node"
13839 def ExpandNames(self, lu):
13840 lu.needed_locks = {}
13842 # The following variables interact with _QueryBase._GetNames
13844 self.wanted = _GetWantedNodes(lu, self.names)
13846 self.wanted = locking.ALL_SET
13848 self.do_locking = self.use_locking
13850 if self.do_locking:
13851 lu.share_locks = _ShareAll()
13852 lu.needed_locks = {
13853 locking.LEVEL_NODE: self.wanted,
13856 def DeclareLocks(self, lu, level):
13859 def _GetQueryData(self, lu):
13860 """Computes the list of nodes and their attributes.
13863 # Locking is not used
13865 assert not (compat.any(lu.glm.is_owned(level)
13866 for level in locking.LEVELS
13867 if level != locking.LEVEL_CLUSTER) or
13868 self.do_locking or self.use_locking)
13870 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13874 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13876 result.append((node, None))
13878 result.extend((node, expname) for expname in nres.payload)
13883 class LUBackupPrepare(NoHooksLU):
13884 """Prepares an instance for an export and returns useful information.
13889 def ExpandNames(self):
13890 self._ExpandAndLockInstance()
13892 def CheckPrereq(self):
13893 """Check prerequisites.
13896 instance_name = self.op.instance_name
13898 self.instance = self.cfg.GetInstanceInfo(instance_name)
13899 assert self.instance is not None, \
13900 "Cannot retrieve locked instance %s" % self.op.instance_name
13901 _CheckNodeOnline(self, self.instance.primary_node)
13903 self._cds = _GetClusterDomainSecret()
13905 def Exec(self, feedback_fn):
13906 """Prepares an instance for an export.
13909 instance = self.instance
13911 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13912 salt = utils.GenerateSecret(8)
13914 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13915 result = self.rpc.call_x509_cert_create(instance.primary_node,
13916 constants.RIE_CERT_VALIDITY)
13917 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13919 (name, cert_pem) = result.payload
13921 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13925 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13926 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13928 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13934 class LUBackupExport(LogicalUnit):
13935 """Export an instance to an image in the cluster.
13938 HPATH = "instance-export"
13939 HTYPE = constants.HTYPE_INSTANCE
13942 def CheckArguments(self):
13943 """Check the arguments.
13946 self.x509_key_name = self.op.x509_key_name
13947 self.dest_x509_ca_pem = self.op.destination_x509_ca
13949 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13950 if not self.x509_key_name:
13951 raise errors.OpPrereqError("Missing X509 key name for encryption",
13952 errors.ECODE_INVAL)
13954 if not self.dest_x509_ca_pem:
13955 raise errors.OpPrereqError("Missing destination X509 CA",
13956 errors.ECODE_INVAL)
13958 def ExpandNames(self):
13959 self._ExpandAndLockInstance()
13961 # Lock all nodes for local exports
13962 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13963 # FIXME: lock only instance primary and destination node
13965 # Sad but true, for now we have do lock all nodes, as we don't know where
13966 # the previous export might be, and in this LU we search for it and
13967 # remove it from its current node. In the future we could fix this by:
13968 # - making a tasklet to search (share-lock all), then create the
13969 # new one, then one to remove, after
13970 # - removing the removal operation altogether
13971 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13973 def DeclareLocks(self, level):
13974 """Last minute lock declaration."""
13975 # All nodes are locked anyway, so nothing to do here.
13977 def BuildHooksEnv(self):
13978 """Build hooks env.
13980 This will run on the master, primary node and target node.
13984 "EXPORT_MODE": self.op.mode,
13985 "EXPORT_NODE": self.op.target_node,
13986 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13987 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13988 # TODO: Generic function for boolean env variables
13989 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13992 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13996 def BuildHooksNodes(self):
13997 """Build hooks nodes.
14000 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14002 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14003 nl.append(self.op.target_node)
14007 def CheckPrereq(self):
14008 """Check prerequisites.
14010 This checks that the instance and node names are valid.
14013 instance_name = self.op.instance_name
14015 self.instance = self.cfg.GetInstanceInfo(instance_name)
14016 assert self.instance is not None, \
14017 "Cannot retrieve locked instance %s" % self.op.instance_name
14018 _CheckNodeOnline(self, self.instance.primary_node)
14020 if (self.op.remove_instance and
14021 self.instance.admin_state == constants.ADMINST_UP and
14022 not self.op.shutdown):
14023 raise errors.OpPrereqError("Can not remove instance without shutting it"
14024 " down before", errors.ECODE_STATE)
14026 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14027 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14028 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14029 assert self.dst_node is not None
14031 _CheckNodeOnline(self, self.dst_node.name)
14032 _CheckNodeNotDrained(self, self.dst_node.name)
14035 self.dest_disk_info = None
14036 self.dest_x509_ca = None
14038 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14039 self.dst_node = None
14041 if len(self.op.target_node) != len(self.instance.disks):
14042 raise errors.OpPrereqError(("Received destination information for %s"
14043 " disks, but instance %s has %s disks") %
14044 (len(self.op.target_node), instance_name,
14045 len(self.instance.disks)),
14046 errors.ECODE_INVAL)
14048 cds = _GetClusterDomainSecret()
14050 # Check X509 key name
14052 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14053 except (TypeError, ValueError), err:
14054 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14055 errors.ECODE_INVAL)
14057 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14058 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14059 errors.ECODE_INVAL)
14061 # Load and verify CA
14063 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14064 except OpenSSL.crypto.Error, err:
14065 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14066 (err, ), errors.ECODE_INVAL)
14068 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14069 if errcode is not None:
14070 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14071 (msg, ), errors.ECODE_INVAL)
14073 self.dest_x509_ca = cert
14075 # Verify target information
14077 for idx, disk_data in enumerate(self.op.target_node):
14079 (host, port, magic) = \
14080 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14081 except errors.GenericError, err:
14082 raise errors.OpPrereqError("Target info for disk %s: %s" %
14083 (idx, err), errors.ECODE_INVAL)
14085 disk_info.append((host, port, magic))
14087 assert len(disk_info) == len(self.op.target_node)
14088 self.dest_disk_info = disk_info
14091 raise errors.ProgrammerError("Unhandled export mode %r" %
14094 # instance disk type verification
14095 # TODO: Implement export support for file-based disks
14096 for disk in self.instance.disks:
14097 if disk.dev_type == constants.LD_FILE:
14098 raise errors.OpPrereqError("Export not supported for instances with"
14099 " file-based disks", errors.ECODE_INVAL)
14101 def _CleanupExports(self, feedback_fn):
14102 """Removes exports of current instance from all other nodes.
14104 If an instance in a cluster with nodes A..D was exported to node C, its
14105 exports will be removed from the nodes A, B and D.
14108 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14110 nodelist = self.cfg.GetNodeList()
14111 nodelist.remove(self.dst_node.name)
14113 # on one-node clusters nodelist will be empty after the removal
14114 # if we proceed the backup would be removed because OpBackupQuery
14115 # substitutes an empty list with the full cluster node list.
14116 iname = self.instance.name
14118 feedback_fn("Removing old exports for instance %s" % iname)
14119 exportlist = self.rpc.call_export_list(nodelist)
14120 for node in exportlist:
14121 if exportlist[node].fail_msg:
14123 if iname in exportlist[node].payload:
14124 msg = self.rpc.call_export_remove(node, iname).fail_msg
14126 self.LogWarning("Could not remove older export for instance %s"
14127 " on node %s: %s", iname, node, msg)
14129 def Exec(self, feedback_fn):
14130 """Export an instance to an image in the cluster.
14133 assert self.op.mode in constants.EXPORT_MODES
14135 instance = self.instance
14136 src_node = instance.primary_node
14138 if self.op.shutdown:
14139 # shutdown the instance, but not the disks
14140 feedback_fn("Shutting down instance %s" % instance.name)
14141 result = self.rpc.call_instance_shutdown(src_node, instance,
14142 self.op.shutdown_timeout)
14143 # TODO: Maybe ignore failures if ignore_remove_failures is set
14144 result.Raise("Could not shutdown instance %s on"
14145 " node %s" % (instance.name, src_node))
14147 # set the disks ID correctly since call_instance_start needs the
14148 # correct drbd minor to create the symlinks
14149 for disk in instance.disks:
14150 self.cfg.SetDiskID(disk, src_node)
14152 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14155 # Activate the instance disks if we'exporting a stopped instance
14156 feedback_fn("Activating disks for %s" % instance.name)
14157 _StartInstanceDisks(self, instance, None)
14160 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14163 helper.CreateSnapshots()
14165 if (self.op.shutdown and
14166 instance.admin_state == constants.ADMINST_UP and
14167 not self.op.remove_instance):
14168 assert not activate_disks
14169 feedback_fn("Starting instance %s" % instance.name)
14170 result = self.rpc.call_instance_start(src_node,
14171 (instance, None, None), False)
14172 msg = result.fail_msg
14174 feedback_fn("Failed to start instance: %s" % msg)
14175 _ShutdownInstanceDisks(self, instance)
14176 raise errors.OpExecError("Could not start instance: %s" % msg)
14178 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14179 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14180 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14181 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14182 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14184 (key_name, _, _) = self.x509_key_name
14187 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14190 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14191 key_name, dest_ca_pem,
14196 # Check for backwards compatibility
14197 assert len(dresults) == len(instance.disks)
14198 assert compat.all(isinstance(i, bool) for i in dresults), \
14199 "Not all results are boolean: %r" % dresults
14203 feedback_fn("Deactivating disks for %s" % instance.name)
14204 _ShutdownInstanceDisks(self, instance)
14206 if not (compat.all(dresults) and fin_resu):
14209 failures.append("export finalization")
14210 if not compat.all(dresults):
14211 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14213 failures.append("disk export: disk(s) %s" % fdsk)
14215 raise errors.OpExecError("Export failed, errors in %s" %
14216 utils.CommaJoin(failures))
14218 # At this point, the export was successful, we can cleanup/finish
14220 # Remove instance if requested
14221 if self.op.remove_instance:
14222 feedback_fn("Removing instance %s" % instance.name)
14223 _RemoveInstance(self, feedback_fn, instance,
14224 self.op.ignore_remove_failures)
14226 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14227 self._CleanupExports(feedback_fn)
14229 return fin_resu, dresults
14232 class LUBackupRemove(NoHooksLU):
14233 """Remove exports related to the named instance.
14238 def ExpandNames(self):
14239 self.needed_locks = {}
14240 # We need all nodes to be locked in order for RemoveExport to work, but we
14241 # don't need to lock the instance itself, as nothing will happen to it (and
14242 # we can remove exports also for a removed instance)
14243 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14245 def Exec(self, feedback_fn):
14246 """Remove any export.
14249 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14250 # If the instance was not found we'll try with the name that was passed in.
14251 # This will only work if it was an FQDN, though.
14253 if not instance_name:
14255 instance_name = self.op.instance_name
14257 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14258 exportlist = self.rpc.call_export_list(locked_nodes)
14260 for node in exportlist:
14261 msg = exportlist[node].fail_msg
14263 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14265 if instance_name in exportlist[node].payload:
14267 result = self.rpc.call_export_remove(node, instance_name)
14268 msg = result.fail_msg
14270 logging.error("Could not remove export for instance %s"
14271 " on node %s: %s", instance_name, node, msg)
14273 if fqdn_warn and not found:
14274 feedback_fn("Export not found. If trying to remove an export belonging"
14275 " to a deleted instance please use its Fully Qualified"
14279 class LUGroupAdd(LogicalUnit):
14280 """Logical unit for creating node groups.
14283 HPATH = "group-add"
14284 HTYPE = constants.HTYPE_GROUP
14287 def ExpandNames(self):
14288 # We need the new group's UUID here so that we can create and acquire the
14289 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14290 # that it should not check whether the UUID exists in the configuration.
14291 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14292 self.needed_locks = {}
14293 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14295 def CheckPrereq(self):
14296 """Check prerequisites.
14298 This checks that the given group name is not an existing node group
14303 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14304 except errors.OpPrereqError:
14307 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14308 " node group (UUID: %s)" %
14309 (self.op.group_name, existing_uuid),
14310 errors.ECODE_EXISTS)
14312 if self.op.ndparams:
14313 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14315 if self.op.hv_state:
14316 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14318 self.new_hv_state = None
14320 if self.op.disk_state:
14321 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14323 self.new_disk_state = None
14325 if self.op.diskparams:
14326 for templ in constants.DISK_TEMPLATES:
14327 if templ in self.op.diskparams:
14328 utils.ForceDictType(self.op.diskparams[templ],
14329 constants.DISK_DT_TYPES)
14330 self.new_diskparams = self.op.diskparams
14332 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14333 except errors.OpPrereqError, err:
14334 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14335 errors.ECODE_INVAL)
14337 self.new_diskparams = {}
14339 if self.op.ipolicy:
14340 cluster = self.cfg.GetClusterInfo()
14341 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14343 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14344 except errors.ConfigurationError, err:
14345 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14346 errors.ECODE_INVAL)
14348 def BuildHooksEnv(self):
14349 """Build hooks env.
14353 "GROUP_NAME": self.op.group_name,
14356 def BuildHooksNodes(self):
14357 """Build hooks nodes.
14360 mn = self.cfg.GetMasterNode()
14361 return ([mn], [mn])
14363 def Exec(self, feedback_fn):
14364 """Add the node group to the cluster.
14367 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14368 uuid=self.group_uuid,
14369 alloc_policy=self.op.alloc_policy,
14370 ndparams=self.op.ndparams,
14371 diskparams=self.new_diskparams,
14372 ipolicy=self.op.ipolicy,
14373 hv_state_static=self.new_hv_state,
14374 disk_state_static=self.new_disk_state)
14376 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14377 del self.remove_locks[locking.LEVEL_NODEGROUP]
14380 class LUGroupAssignNodes(NoHooksLU):
14381 """Logical unit for assigning nodes to groups.
14386 def ExpandNames(self):
14387 # These raise errors.OpPrereqError on their own:
14388 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14389 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14391 # We want to lock all the affected nodes and groups. We have readily
14392 # available the list of nodes, and the *destination* group. To gather the
14393 # list of "source" groups, we need to fetch node information later on.
14394 self.needed_locks = {
14395 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14396 locking.LEVEL_NODE: self.op.nodes,
14399 def DeclareLocks(self, level):
14400 if level == locking.LEVEL_NODEGROUP:
14401 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14403 # Try to get all affected nodes' groups without having the group or node
14404 # lock yet. Needs verification later in the code flow.
14405 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14407 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14409 def CheckPrereq(self):
14410 """Check prerequisites.
14413 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14414 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14415 frozenset(self.op.nodes))
14417 expected_locks = (set([self.group_uuid]) |
14418 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14419 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14420 if actual_locks != expected_locks:
14421 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14422 " current groups are '%s', used to be '%s'" %
14423 (utils.CommaJoin(expected_locks),
14424 utils.CommaJoin(actual_locks)))
14426 self.node_data = self.cfg.GetAllNodesInfo()
14427 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14428 instance_data = self.cfg.GetAllInstancesInfo()
14430 if self.group is None:
14431 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14432 (self.op.group_name, self.group_uuid))
14434 (new_splits, previous_splits) = \
14435 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14436 for node in self.op.nodes],
14437 self.node_data, instance_data)
14440 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14442 if not self.op.force:
14443 raise errors.OpExecError("The following instances get split by this"
14444 " change and --force was not given: %s" %
14447 self.LogWarning("This operation will split the following instances: %s",
14450 if previous_splits:
14451 self.LogWarning("In addition, these already-split instances continue"
14452 " to be split across groups: %s",
14453 utils.CommaJoin(utils.NiceSort(previous_splits)))
14455 def Exec(self, feedback_fn):
14456 """Assign nodes to a new group.
14459 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14461 self.cfg.AssignGroupNodes(mods)
14464 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14465 """Check for split instances after a node assignment.
14467 This method considers a series of node assignments as an atomic operation,
14468 and returns information about split instances after applying the set of
14471 In particular, it returns information about newly split instances, and
14472 instances that were already split, and remain so after the change.
14474 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14477 @type changes: list of (node_name, new_group_uuid) pairs.
14478 @param changes: list of node assignments to consider.
14479 @param node_data: a dict with data for all nodes
14480 @param instance_data: a dict with all instances to consider
14481 @rtype: a two-tuple
14482 @return: a list of instances that were previously okay and result split as a
14483 consequence of this change, and a list of instances that were previously
14484 split and this change does not fix.
14487 changed_nodes = dict((node, group) for node, group in changes
14488 if node_data[node].group != group)
14490 all_split_instances = set()
14491 previously_split_instances = set()
14493 def InstanceNodes(instance):
14494 return [instance.primary_node] + list(instance.secondary_nodes)
14496 for inst in instance_data.values():
14497 if inst.disk_template not in constants.DTS_INT_MIRROR:
14500 instance_nodes = InstanceNodes(inst)
14502 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14503 previously_split_instances.add(inst.name)
14505 if len(set(changed_nodes.get(node, node_data[node].group)
14506 for node in instance_nodes)) > 1:
14507 all_split_instances.add(inst.name)
14509 return (list(all_split_instances - previously_split_instances),
14510 list(previously_split_instances & all_split_instances))
14513 class _GroupQuery(_QueryBase):
14514 FIELDS = query.GROUP_FIELDS
14516 def ExpandNames(self, lu):
14517 lu.needed_locks = {}
14519 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14520 self._cluster = lu.cfg.GetClusterInfo()
14521 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14524 self.wanted = [name_to_uuid[name]
14525 for name in utils.NiceSort(name_to_uuid.keys())]
14527 # Accept names to be either names or UUIDs.
14530 all_uuid = frozenset(self._all_groups.keys())
14532 for name in self.names:
14533 if name in all_uuid:
14534 self.wanted.append(name)
14535 elif name in name_to_uuid:
14536 self.wanted.append(name_to_uuid[name])
14538 missing.append(name)
14541 raise errors.OpPrereqError("Some groups do not exist: %s" %
14542 utils.CommaJoin(missing),
14543 errors.ECODE_NOENT)
14545 def DeclareLocks(self, lu, level):
14548 def _GetQueryData(self, lu):
14549 """Computes the list of node groups and their attributes.
14552 do_nodes = query.GQ_NODE in self.requested_data
14553 do_instances = query.GQ_INST in self.requested_data
14555 group_to_nodes = None
14556 group_to_instances = None
14558 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14559 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14560 # latter GetAllInstancesInfo() is not enough, for we have to go through
14561 # instance->node. Hence, we will need to process nodes even if we only need
14562 # instance information.
14563 if do_nodes or do_instances:
14564 all_nodes = lu.cfg.GetAllNodesInfo()
14565 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14568 for node in all_nodes.values():
14569 if node.group in group_to_nodes:
14570 group_to_nodes[node.group].append(node.name)
14571 node_to_group[node.name] = node.group
14574 all_instances = lu.cfg.GetAllInstancesInfo()
14575 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14577 for instance in all_instances.values():
14578 node = instance.primary_node
14579 if node in node_to_group:
14580 group_to_instances[node_to_group[node]].append(instance.name)
14583 # Do not pass on node information if it was not requested.
14584 group_to_nodes = None
14586 return query.GroupQueryData(self._cluster,
14587 [self._all_groups[uuid]
14588 for uuid in self.wanted],
14589 group_to_nodes, group_to_instances,
14590 query.GQ_DISKPARAMS in self.requested_data)
14593 class LUGroupQuery(NoHooksLU):
14594 """Logical unit for querying node groups.
14599 def CheckArguments(self):
14600 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14601 self.op.output_fields, False)
14603 def ExpandNames(self):
14604 self.gq.ExpandNames(self)
14606 def DeclareLocks(self, level):
14607 self.gq.DeclareLocks(self, level)
14609 def Exec(self, feedback_fn):
14610 return self.gq.OldStyleQuery(self)
14613 class LUGroupSetParams(LogicalUnit):
14614 """Modifies the parameters of a node group.
14617 HPATH = "group-modify"
14618 HTYPE = constants.HTYPE_GROUP
14621 def CheckArguments(self):
14624 self.op.diskparams,
14625 self.op.alloc_policy,
14627 self.op.disk_state,
14631 if all_changes.count(None) == len(all_changes):
14632 raise errors.OpPrereqError("Please pass at least one modification",
14633 errors.ECODE_INVAL)
14635 def ExpandNames(self):
14636 # This raises errors.OpPrereqError on its own:
14637 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14639 self.needed_locks = {
14640 locking.LEVEL_INSTANCE: [],
14641 locking.LEVEL_NODEGROUP: [self.group_uuid],
14644 self.share_locks[locking.LEVEL_INSTANCE] = 1
14646 def DeclareLocks(self, level):
14647 if level == locking.LEVEL_INSTANCE:
14648 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14650 # Lock instances optimistically, needs verification once group lock has
14652 self.needed_locks[locking.LEVEL_INSTANCE] = \
14653 self.cfg.GetNodeGroupInstances(self.group_uuid)
14656 def _UpdateAndVerifyDiskParams(old, new):
14657 """Updates and verifies disk parameters.
14660 new_params = _GetUpdatedParams(old, new)
14661 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14664 def CheckPrereq(self):
14665 """Check prerequisites.
14668 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14670 # Check if locked instances are still correct
14671 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14673 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14674 cluster = self.cfg.GetClusterInfo()
14676 if self.group is None:
14677 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14678 (self.op.group_name, self.group_uuid))
14680 if self.op.ndparams:
14681 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14682 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14683 self.new_ndparams = new_ndparams
14685 if self.op.diskparams:
14686 diskparams = self.group.diskparams
14687 uavdp = self._UpdateAndVerifyDiskParams
14688 # For each disktemplate subdict update and verify the values
14689 new_diskparams = dict((dt,
14690 uavdp(diskparams.get(dt, {}),
14691 self.op.diskparams[dt]))
14692 for dt in constants.DISK_TEMPLATES
14693 if dt in self.op.diskparams)
14694 # As we've all subdicts of diskparams ready, lets merge the actual
14695 # dict with all updated subdicts
14696 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14698 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14699 except errors.OpPrereqError, err:
14700 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14701 errors.ECODE_INVAL)
14703 if self.op.hv_state:
14704 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14705 self.group.hv_state_static)
14707 if self.op.disk_state:
14708 self.new_disk_state = \
14709 _MergeAndVerifyDiskState(self.op.disk_state,
14710 self.group.disk_state_static)
14712 if self.op.ipolicy:
14713 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14717 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14718 inst_filter = lambda inst: inst.name in owned_instances
14719 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14720 gmi = ganeti.masterd.instance
14722 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14724 new_ipolicy, instances)
14727 self.LogWarning("After the ipolicy change the following instances"
14728 " violate them: %s",
14729 utils.CommaJoin(violations))
14731 def BuildHooksEnv(self):
14732 """Build hooks env.
14736 "GROUP_NAME": self.op.group_name,
14737 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14740 def BuildHooksNodes(self):
14741 """Build hooks nodes.
14744 mn = self.cfg.GetMasterNode()
14745 return ([mn], [mn])
14747 def Exec(self, feedback_fn):
14748 """Modifies the node group.
14753 if self.op.ndparams:
14754 self.group.ndparams = self.new_ndparams
14755 result.append(("ndparams", str(self.group.ndparams)))
14757 if self.op.diskparams:
14758 self.group.diskparams = self.new_diskparams
14759 result.append(("diskparams", str(self.group.diskparams)))
14761 if self.op.alloc_policy:
14762 self.group.alloc_policy = self.op.alloc_policy
14764 if self.op.hv_state:
14765 self.group.hv_state_static = self.new_hv_state
14767 if self.op.disk_state:
14768 self.group.disk_state_static = self.new_disk_state
14770 if self.op.ipolicy:
14771 self.group.ipolicy = self.new_ipolicy
14773 self.cfg.Update(self.group, feedback_fn)
14777 class LUGroupRemove(LogicalUnit):
14778 HPATH = "group-remove"
14779 HTYPE = constants.HTYPE_GROUP
14782 def ExpandNames(self):
14783 # This will raises errors.OpPrereqError on its own:
14784 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14785 self.needed_locks = {
14786 locking.LEVEL_NODEGROUP: [self.group_uuid],
14789 def CheckPrereq(self):
14790 """Check prerequisites.
14792 This checks that the given group name exists as a node group, that is
14793 empty (i.e., contains no nodes), and that is not the last group of the
14797 # Verify that the group is empty.
14798 group_nodes = [node.name
14799 for node in self.cfg.GetAllNodesInfo().values()
14800 if node.group == self.group_uuid]
14803 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14805 (self.op.group_name,
14806 utils.CommaJoin(utils.NiceSort(group_nodes))),
14807 errors.ECODE_STATE)
14809 # Verify the cluster would not be left group-less.
14810 if len(self.cfg.GetNodeGroupList()) == 1:
14811 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14812 " removed" % self.op.group_name,
14813 errors.ECODE_STATE)
14815 def BuildHooksEnv(self):
14816 """Build hooks env.
14820 "GROUP_NAME": self.op.group_name,
14823 def BuildHooksNodes(self):
14824 """Build hooks nodes.
14827 mn = self.cfg.GetMasterNode()
14828 return ([mn], [mn])
14830 def Exec(self, feedback_fn):
14831 """Remove the node group.
14835 self.cfg.RemoveNodeGroup(self.group_uuid)
14836 except errors.ConfigurationError:
14837 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14838 (self.op.group_name, self.group_uuid))
14840 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14843 class LUGroupRename(LogicalUnit):
14844 HPATH = "group-rename"
14845 HTYPE = constants.HTYPE_GROUP
14848 def ExpandNames(self):
14849 # This raises errors.OpPrereqError on its own:
14850 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14852 self.needed_locks = {
14853 locking.LEVEL_NODEGROUP: [self.group_uuid],
14856 def CheckPrereq(self):
14857 """Check prerequisites.
14859 Ensures requested new name is not yet used.
14863 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14864 except errors.OpPrereqError:
14867 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14868 " node group (UUID: %s)" %
14869 (self.op.new_name, new_name_uuid),
14870 errors.ECODE_EXISTS)
14872 def BuildHooksEnv(self):
14873 """Build hooks env.
14877 "OLD_NAME": self.op.group_name,
14878 "NEW_NAME": self.op.new_name,
14881 def BuildHooksNodes(self):
14882 """Build hooks nodes.
14885 mn = self.cfg.GetMasterNode()
14887 all_nodes = self.cfg.GetAllNodesInfo()
14888 all_nodes.pop(mn, None)
14891 run_nodes.extend(node.name for node in all_nodes.values()
14892 if node.group == self.group_uuid)
14894 return (run_nodes, run_nodes)
14896 def Exec(self, feedback_fn):
14897 """Rename the node group.
14900 group = self.cfg.GetNodeGroup(self.group_uuid)
14903 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14904 (self.op.group_name, self.group_uuid))
14906 group.name = self.op.new_name
14907 self.cfg.Update(group, feedback_fn)
14909 return self.op.new_name
14912 class LUGroupEvacuate(LogicalUnit):
14913 HPATH = "group-evacuate"
14914 HTYPE = constants.HTYPE_GROUP
14917 def ExpandNames(self):
14918 # This raises errors.OpPrereqError on its own:
14919 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14921 if self.op.target_groups:
14922 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14923 self.op.target_groups)
14925 self.req_target_uuids = []
14927 if self.group_uuid in self.req_target_uuids:
14928 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14929 " as a target group (targets are %s)" %
14931 utils.CommaJoin(self.req_target_uuids)),
14932 errors.ECODE_INVAL)
14934 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14936 self.share_locks = _ShareAll()
14937 self.needed_locks = {
14938 locking.LEVEL_INSTANCE: [],
14939 locking.LEVEL_NODEGROUP: [],
14940 locking.LEVEL_NODE: [],
14943 def DeclareLocks(self, level):
14944 if level == locking.LEVEL_INSTANCE:
14945 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14947 # Lock instances optimistically, needs verification once node and group
14948 # locks have been acquired
14949 self.needed_locks[locking.LEVEL_INSTANCE] = \
14950 self.cfg.GetNodeGroupInstances(self.group_uuid)
14952 elif level == locking.LEVEL_NODEGROUP:
14953 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14955 if self.req_target_uuids:
14956 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14958 # Lock all groups used by instances optimistically; this requires going
14959 # via the node before it's locked, requiring verification later on
14960 lock_groups.update(group_uuid
14961 for instance_name in
14962 self.owned_locks(locking.LEVEL_INSTANCE)
14964 self.cfg.GetInstanceNodeGroups(instance_name))
14966 # No target groups, need to lock all of them
14967 lock_groups = locking.ALL_SET
14969 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14971 elif level == locking.LEVEL_NODE:
14972 # This will only lock the nodes in the group to be evacuated which
14973 # contain actual instances
14974 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14975 self._LockInstancesNodes()
14977 # Lock all nodes in group to be evacuated and target groups
14978 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14979 assert self.group_uuid in owned_groups
14980 member_nodes = [node_name
14981 for group in owned_groups
14982 for node_name in self.cfg.GetNodeGroup(group).members]
14983 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14985 def CheckPrereq(self):
14986 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14987 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14988 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14990 assert owned_groups.issuperset(self.req_target_uuids)
14991 assert self.group_uuid in owned_groups
14993 # Check if locked instances are still correct
14994 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14996 # Get instance information
14997 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14999 # Check if node groups for locked instances are still correct
15000 _CheckInstancesNodeGroups(self.cfg, self.instances,
15001 owned_groups, owned_nodes, self.group_uuid)
15003 if self.req_target_uuids:
15004 # User requested specific target groups
15005 self.target_uuids = self.req_target_uuids
15007 # All groups except the one to be evacuated are potential targets
15008 self.target_uuids = [group_uuid for group_uuid in owned_groups
15009 if group_uuid != self.group_uuid]
15011 if not self.target_uuids:
15012 raise errors.OpPrereqError("There are no possible target groups",
15013 errors.ECODE_INVAL)
15015 def BuildHooksEnv(self):
15016 """Build hooks env.
15020 "GROUP_NAME": self.op.group_name,
15021 "TARGET_GROUPS": " ".join(self.target_uuids),
15024 def BuildHooksNodes(self):
15025 """Build hooks nodes.
15028 mn = self.cfg.GetMasterNode()
15030 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15032 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15034 return (run_nodes, run_nodes)
15036 def Exec(self, feedback_fn):
15037 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15039 assert self.group_uuid not in self.target_uuids
15041 req = iallocator.IAReqGroupChange(instances=instances,
15042 target_groups=self.target_uuids)
15043 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15045 ial.Run(self.op.iallocator)
15047 if not ial.success:
15048 raise errors.OpPrereqError("Can't compute group evacuation using"
15049 " iallocator '%s': %s" %
15050 (self.op.iallocator, ial.info),
15051 errors.ECODE_NORES)
15053 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15055 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15056 len(jobs), self.op.group_name)
15058 return ResultWithJobs(jobs)
15061 class TagsLU(NoHooksLU): # pylint: disable=W0223
15062 """Generic tags LU.
15064 This is an abstract class which is the parent of all the other tags LUs.
15067 def ExpandNames(self):
15068 self.group_uuid = None
15069 self.needed_locks = {}
15071 if self.op.kind == constants.TAG_NODE:
15072 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15073 lock_level = locking.LEVEL_NODE
15074 lock_name = self.op.name
15075 elif self.op.kind == constants.TAG_INSTANCE:
15076 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15077 lock_level = locking.LEVEL_INSTANCE
15078 lock_name = self.op.name
15079 elif self.op.kind == constants.TAG_NODEGROUP:
15080 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15081 lock_level = locking.LEVEL_NODEGROUP
15082 lock_name = self.group_uuid
15083 elif self.op.kind == constants.TAG_NETWORK:
15084 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15085 lock_level = locking.LEVEL_NETWORK
15086 lock_name = self.network_uuid
15091 if lock_level and getattr(self.op, "use_locking", True):
15092 self.needed_locks[lock_level] = lock_name
15094 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15095 # not possible to acquire the BGL based on opcode parameters)
15097 def CheckPrereq(self):
15098 """Check prerequisites.
15101 if self.op.kind == constants.TAG_CLUSTER:
15102 self.target = self.cfg.GetClusterInfo()
15103 elif self.op.kind == constants.TAG_NODE:
15104 self.target = self.cfg.GetNodeInfo(self.op.name)
15105 elif self.op.kind == constants.TAG_INSTANCE:
15106 self.target = self.cfg.GetInstanceInfo(self.op.name)
15107 elif self.op.kind == constants.TAG_NODEGROUP:
15108 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15109 elif self.op.kind == constants.TAG_NETWORK:
15110 self.target = self.cfg.GetNetwork(self.network_uuid)
15112 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15113 str(self.op.kind), errors.ECODE_INVAL)
15116 class LUTagsGet(TagsLU):
15117 """Returns the tags of a given object.
15122 def ExpandNames(self):
15123 TagsLU.ExpandNames(self)
15125 # Share locks as this is only a read operation
15126 self.share_locks = _ShareAll()
15128 def Exec(self, feedback_fn):
15129 """Returns the tag list.
15132 return list(self.target.GetTags())
15135 class LUTagsSearch(NoHooksLU):
15136 """Searches the tags for a given pattern.
15141 def ExpandNames(self):
15142 self.needed_locks = {}
15144 def CheckPrereq(self):
15145 """Check prerequisites.
15147 This checks the pattern passed for validity by compiling it.
15151 self.re = re.compile(self.op.pattern)
15152 except re.error, err:
15153 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15154 (self.op.pattern, err), errors.ECODE_INVAL)
15156 def Exec(self, feedback_fn):
15157 """Returns the tag list.
15161 tgts = [("/cluster", cfg.GetClusterInfo())]
15162 ilist = cfg.GetAllInstancesInfo().values()
15163 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15164 nlist = cfg.GetAllNodesInfo().values()
15165 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15166 tgts.extend(("/nodegroup/%s" % n.name, n)
15167 for n in cfg.GetAllNodeGroupsInfo().values())
15169 for path, target in tgts:
15170 for tag in target.GetTags():
15171 if self.re.search(tag):
15172 results.append((path, tag))
15176 class LUTagsSet(TagsLU):
15177 """Sets a tag on a given object.
15182 def CheckPrereq(self):
15183 """Check prerequisites.
15185 This checks the type and length of the tag name and value.
15188 TagsLU.CheckPrereq(self)
15189 for tag in self.op.tags:
15190 objects.TaggableObject.ValidateTag(tag)
15192 def Exec(self, feedback_fn):
15197 for tag in self.op.tags:
15198 self.target.AddTag(tag)
15199 except errors.TagError, err:
15200 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15201 self.cfg.Update(self.target, feedback_fn)
15204 class LUTagsDel(TagsLU):
15205 """Delete a list of tags from a given object.
15210 def CheckPrereq(self):
15211 """Check prerequisites.
15213 This checks that we have the given tag.
15216 TagsLU.CheckPrereq(self)
15217 for tag in self.op.tags:
15218 objects.TaggableObject.ValidateTag(tag)
15219 del_tags = frozenset(self.op.tags)
15220 cur_tags = self.target.GetTags()
15222 diff_tags = del_tags - cur_tags
15224 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15225 raise errors.OpPrereqError("Tag(s) %s not found" %
15226 (utils.CommaJoin(diff_names), ),
15227 errors.ECODE_NOENT)
15229 def Exec(self, feedback_fn):
15230 """Remove the tag from the object.
15233 for tag in self.op.tags:
15234 self.target.RemoveTag(tag)
15235 self.cfg.Update(self.target, feedback_fn)
15238 class LUTestDelay(NoHooksLU):
15239 """Sleep for a specified amount of time.
15241 This LU sleeps on the master and/or nodes for a specified amount of
15247 def ExpandNames(self):
15248 """Expand names and set required locks.
15250 This expands the node list, if any.
15253 self.needed_locks = {}
15254 if self.op.on_nodes:
15255 # _GetWantedNodes can be used here, but is not always appropriate to use
15256 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15257 # more information.
15258 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15259 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15261 def _TestDelay(self):
15262 """Do the actual sleep.
15265 if self.op.on_master:
15266 if not utils.TestDelay(self.op.duration):
15267 raise errors.OpExecError("Error during master delay test")
15268 if self.op.on_nodes:
15269 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15270 for node, node_result in result.items():
15271 node_result.Raise("Failure during rpc call to node %s" % node)
15273 def Exec(self, feedback_fn):
15274 """Execute the test delay opcode, with the wanted repetitions.
15277 if self.op.repeat == 0:
15280 top_value = self.op.repeat - 1
15281 for i in range(self.op.repeat):
15282 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15286 class LURestrictedCommand(NoHooksLU):
15287 """Logical unit for executing restricted commands.
15292 def ExpandNames(self):
15294 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15296 self.needed_locks = {
15297 locking.LEVEL_NODE: self.op.nodes,
15299 self.share_locks = {
15300 locking.LEVEL_NODE: not self.op.use_locking,
15303 def CheckPrereq(self):
15304 """Check prerequisites.
15308 def Exec(self, feedback_fn):
15309 """Execute restricted command and return output.
15312 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15314 # Check if correct locks are held
15315 assert set(self.op.nodes).issubset(owned_nodes)
15317 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15321 for node_name in self.op.nodes:
15322 nres = rpcres[node_name]
15324 msg = ("Command '%s' on node '%s' failed: %s" %
15325 (self.op.command, node_name, nres.fail_msg))
15326 result.append((False, msg))
15328 result.append((True, nres.payload))
15333 class LUTestJqueue(NoHooksLU):
15334 """Utility LU to test some aspects of the job queue.
15339 # Must be lower than default timeout for WaitForJobChange to see whether it
15340 # notices changed jobs
15341 _CLIENT_CONNECT_TIMEOUT = 20.0
15342 _CLIENT_CONFIRM_TIMEOUT = 60.0
15345 def _NotifyUsingSocket(cls, cb, errcls):
15346 """Opens a Unix socket and waits for another program to connect.
15349 @param cb: Callback to send socket name to client
15350 @type errcls: class
15351 @param errcls: Exception class to use for errors
15354 # Using a temporary directory as there's no easy way to create temporary
15355 # sockets without writing a custom loop around tempfile.mktemp and
15357 tmpdir = tempfile.mkdtemp()
15359 tmpsock = utils.PathJoin(tmpdir, "sock")
15361 logging.debug("Creating temporary socket at %s", tmpsock)
15362 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15367 # Send details to client
15370 # Wait for client to connect before continuing
15371 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15373 (conn, _) = sock.accept()
15374 except socket.error, err:
15375 raise errcls("Client didn't connect in time (%s)" % err)
15379 # Remove as soon as client is connected
15380 shutil.rmtree(tmpdir)
15382 # Wait for client to close
15385 # pylint: disable=E1101
15386 # Instance of '_socketobject' has no ... member
15387 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15389 except socket.error, err:
15390 raise errcls("Client failed to confirm notification (%s)" % err)
15394 def _SendNotification(self, test, arg, sockname):
15395 """Sends a notification to the client.
15398 @param test: Test name
15399 @param arg: Test argument (depends on test)
15400 @type sockname: string
15401 @param sockname: Socket path
15404 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15406 def _Notify(self, prereq, test, arg):
15407 """Notifies the client of a test.
15410 @param prereq: Whether this is a prereq-phase test
15412 @param test: Test name
15413 @param arg: Test argument (depends on test)
15417 errcls = errors.OpPrereqError
15419 errcls = errors.OpExecError
15421 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15425 def CheckArguments(self):
15426 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15427 self.expandnames_calls = 0
15429 def ExpandNames(self):
15430 checkargs_calls = getattr(self, "checkargs_calls", 0)
15431 if checkargs_calls < 1:
15432 raise errors.ProgrammerError("CheckArguments was not called")
15434 self.expandnames_calls += 1
15436 if self.op.notify_waitlock:
15437 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15439 self.LogInfo("Expanding names")
15441 # Get lock on master node (just to get a lock, not for a particular reason)
15442 self.needed_locks = {
15443 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15446 def Exec(self, feedback_fn):
15447 if self.expandnames_calls < 1:
15448 raise errors.ProgrammerError("ExpandNames was not called")
15450 if self.op.notify_exec:
15451 self._Notify(False, constants.JQT_EXEC, None)
15453 self.LogInfo("Executing")
15455 if self.op.log_messages:
15456 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15457 for idx, msg in enumerate(self.op.log_messages):
15458 self.LogInfo("Sending log message %s", idx + 1)
15459 feedback_fn(constants.JQT_MSGPREFIX + msg)
15460 # Report how many test messages have been sent
15461 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15464 raise errors.OpExecError("Opcode failure was requested")
15469 class LUTestAllocator(NoHooksLU):
15470 """Run allocator tests.
15472 This LU runs the allocator tests
15475 def CheckPrereq(self):
15476 """Check prerequisites.
15478 This checks the opcode parameters depending on the director and mode test.
15481 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15482 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15483 for attr in ["memory", "disks", "disk_template",
15484 "os", "tags", "nics", "vcpus"]:
15485 if not hasattr(self.op, attr):
15486 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15487 attr, errors.ECODE_INVAL)
15488 iname = self.cfg.ExpandInstanceName(self.op.name)
15489 if iname is not None:
15490 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15491 iname, errors.ECODE_EXISTS)
15492 if not isinstance(self.op.nics, list):
15493 raise errors.OpPrereqError("Invalid parameter 'nics'",
15494 errors.ECODE_INVAL)
15495 if not isinstance(self.op.disks, list):
15496 raise errors.OpPrereqError("Invalid parameter 'disks'",
15497 errors.ECODE_INVAL)
15498 for row in self.op.disks:
15499 if (not isinstance(row, dict) or
15500 constants.IDISK_SIZE not in row or
15501 not isinstance(row[constants.IDISK_SIZE], int) or
15502 constants.IDISK_MODE not in row or
15503 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15504 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15505 " parameter", errors.ECODE_INVAL)
15506 if self.op.hypervisor is None:
15507 self.op.hypervisor = self.cfg.GetHypervisorType()
15508 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15509 fname = _ExpandInstanceName(self.cfg, self.op.name)
15510 self.op.name = fname
15511 self.relocate_from = \
15512 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15513 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15514 constants.IALLOCATOR_MODE_NODE_EVAC):
15515 if not self.op.instances:
15516 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15517 self.op.instances = _GetWantedInstances(self, self.op.instances)
15519 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15520 self.op.mode, errors.ECODE_INVAL)
15522 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15523 if self.op.allocator is None:
15524 raise errors.OpPrereqError("Missing allocator name",
15525 errors.ECODE_INVAL)
15526 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15527 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15528 self.op.direction, errors.ECODE_INVAL)
15530 def Exec(self, feedback_fn):
15531 """Run the allocator test.
15534 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15535 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15536 memory=self.op.memory,
15537 disks=self.op.disks,
15538 disk_template=self.op.disk_template,
15542 vcpus=self.op.vcpus,
15543 spindle_use=self.op.spindle_use,
15544 hypervisor=self.op.hypervisor)
15545 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15546 req = iallocator.IAReqRelocate(name=self.op.name,
15547 relocate_from=list(self.relocate_from))
15548 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15549 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15550 target_groups=self.op.target_groups)
15551 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15552 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15553 evac_mode=self.op.evac_mode)
15554 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15555 disk_template = self.op.disk_template
15556 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15557 memory=self.op.memory,
15558 disks=self.op.disks,
15559 disk_template=disk_template,
15563 vcpus=self.op.vcpus,
15564 spindle_use=self.op.spindle_use,
15565 hypervisor=self.op.hypervisor)
15566 for idx in range(self.op.count)]
15567 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15569 raise errors.ProgrammerError("Uncatched mode %s in"
15570 " LUTestAllocator.Exec", self.op.mode)
15572 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15573 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15574 result = ial.in_text
15576 ial.Run(self.op.allocator, validate=False)
15577 result = ial.out_text
15582 class LUNetworkAdd(LogicalUnit):
15583 """Logical unit for creating networks.
15586 HPATH = "network-add"
15587 HTYPE = constants.HTYPE_NETWORK
15590 def BuildHooksNodes(self):
15591 """Build hooks nodes.
15594 mn = self.cfg.GetMasterNode()
15595 return ([mn], [mn])
15597 def ExpandNames(self):
15598 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15599 self.needed_locks = {}
15600 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15602 def CheckPrereq(self):
15603 """Check prerequisites.
15605 This checks that the given group name is not an existing node group
15609 if self.op.network is None:
15610 raise errors.OpPrereqError("Network must be given",
15611 errors.ECODE_INVAL)
15613 uuid = self.cfg.LookupNetwork(self.op.network_name)
15616 raise errors.OpPrereqError("Network '%s' already defined" %
15617 self.op.network, errors.ECODE_EXISTS)
15619 if self.op.mac_prefix:
15620 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15622 # Check tag validity
15623 for tag in self.op.tags:
15624 objects.TaggableObject.ValidateTag(tag)
15626 def BuildHooksEnv(self):
15627 """Build hooks env.
15631 "name": self.op.network_name,
15632 "subnet": self.op.network,
15633 "gateway": self.op.gateway,
15634 "network6": self.op.network6,
15635 "gateway6": self.op.gateway6,
15636 "mac_prefix": self.op.mac_prefix,
15637 "network_type": self.op.network_type,
15638 "tags": self.op.tags,
15640 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15642 def Exec(self, feedback_fn):
15643 """Add the ip pool to the cluster.
15646 nobj = objects.Network(name=self.op.network_name,
15647 network=self.op.network,
15648 gateway=self.op.gateway,
15649 network6=self.op.network6,
15650 gateway6=self.op.gateway6,
15651 mac_prefix=self.op.mac_prefix,
15652 network_type=self.op.network_type,
15653 uuid=self.network_uuid,
15655 # Initialize the associated address pool
15657 pool = network.AddressPool.InitializeNetwork(nobj)
15658 except errors.AddressPoolError, e:
15659 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15661 # Check if we need to reserve the nodes and the cluster master IP
15662 # These may not be allocated to any instances in routed mode, as
15663 # they wouldn't function anyway.
15664 for node in self.cfg.GetAllNodesInfo().values():
15665 for ip in [node.primary_ip, node.secondary_ip]:
15668 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15670 except errors.AddressPoolError:
15673 master_ip = self.cfg.GetClusterInfo().master_ip
15675 pool.Reserve(master_ip)
15676 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15677 except errors.AddressPoolError:
15680 if self.op.add_reserved_ips:
15681 for ip in self.op.add_reserved_ips:
15683 pool.Reserve(ip, external=True)
15684 except errors.AddressPoolError, e:
15685 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15688 for tag in self.op.tags:
15691 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15692 del self.remove_locks[locking.LEVEL_NETWORK]
15695 class LUNetworkRemove(LogicalUnit):
15696 HPATH = "network-remove"
15697 HTYPE = constants.HTYPE_NETWORK
15700 def ExpandNames(self):
15701 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15703 if not self.network_uuid:
15704 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15705 errors.ECODE_INVAL)
15706 self.needed_locks = {
15707 locking.LEVEL_NETWORK: [self.network_uuid],
15710 def CheckPrereq(self):
15711 """Check prerequisites.
15713 This checks that the given network name exists as a network, that is
15714 empty (i.e., contains no nodes), and that is not the last group of the
15719 # Verify that the network is not conncted.
15720 node_groups = [group.name
15721 for group in self.cfg.GetAllNodeGroupsInfo().values()
15722 for net in group.networks.keys()
15723 if net == self.network_uuid]
15726 self.LogWarning("Nework '%s' is connected to the following"
15727 " node groups: %s" % (self.op.network_name,
15728 utils.CommaJoin(utils.NiceSort(node_groups))))
15729 raise errors.OpPrereqError("Network still connected",
15730 errors.ECODE_STATE)
15732 def BuildHooksEnv(self):
15733 """Build hooks env.
15737 "NETWORK_NAME": self.op.network_name,
15740 def BuildHooksNodes(self):
15741 """Build hooks nodes.
15744 mn = self.cfg.GetMasterNode()
15745 return ([mn], [mn])
15747 def Exec(self, feedback_fn):
15748 """Remove the network.
15752 self.cfg.RemoveNetwork(self.network_uuid)
15753 except errors.ConfigurationError:
15754 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15755 (self.op.network_name, self.network_uuid))
15758 class LUNetworkSetParams(LogicalUnit):
15759 """Modifies the parameters of a network.
15762 HPATH = "network-modify"
15763 HTYPE = constants.HTYPE_NETWORK
15766 def CheckArguments(self):
15767 if (self.op.gateway and
15768 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15769 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15770 " at once", errors.ECODE_INVAL)
15772 def ExpandNames(self):
15773 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15774 self.network = self.cfg.GetNetwork(self.network_uuid)
15775 if self.network is None:
15776 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15777 (self.op.network_name, self.network_uuid),
15778 errors.ECODE_INVAL)
15779 self.needed_locks = {
15780 locking.LEVEL_NETWORK: [self.network_uuid],
15783 def CheckPrereq(self):
15784 """Check prerequisites.
15787 self.gateway = self.network.gateway
15788 self.network_type = self.network.network_type
15789 self.mac_prefix = self.network.mac_prefix
15790 self.network6 = self.network.network6
15791 self.gateway6 = self.network.gateway6
15792 self.tags = self.network.tags
15794 self.pool = network.AddressPool(self.network)
15796 if self.op.gateway:
15797 if self.op.gateway == constants.VALUE_NONE:
15798 self.gateway = None
15800 self.gateway = self.op.gateway
15801 if self.pool.IsReserved(self.gateway):
15802 raise errors.OpPrereqError("%s is already reserved" %
15803 self.gateway, errors.ECODE_INVAL)
15805 if self.op.network_type:
15806 if self.op.network_type == constants.VALUE_NONE:
15807 self.network_type = None
15809 self.network_type = self.op.network_type
15811 if self.op.mac_prefix:
15812 if self.op.mac_prefix == constants.VALUE_NONE:
15813 self.mac_prefix = None
15815 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15816 self.mac_prefix = self.op.mac_prefix
15818 if self.op.gateway6:
15819 if self.op.gateway6 == constants.VALUE_NONE:
15820 self.gateway6 = None
15822 self.gateway6 = self.op.gateway6
15824 if self.op.network6:
15825 if self.op.network6 == constants.VALUE_NONE:
15826 self.network6 = None
15828 self.network6 = self.op.network6
15830 def BuildHooksEnv(self):
15831 """Build hooks env.
15835 "name": self.op.network_name,
15836 "subnet": self.network.network,
15837 "gateway": self.gateway,
15838 "network6": self.network6,
15839 "gateway6": self.gateway6,
15840 "mac_prefix": self.mac_prefix,
15841 "network_type": self.network_type,
15844 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15846 def BuildHooksNodes(self):
15847 """Build hooks nodes.
15850 mn = self.cfg.GetMasterNode()
15851 return ([mn], [mn])
15853 def Exec(self, feedback_fn):
15854 """Modifies the network.
15857 #TODO: reserve/release via temporary reservation manager
15858 # extend cfg.ReserveIp/ReleaseIp with the external flag
15859 if self.op.gateway:
15860 if self.gateway == self.network.gateway:
15861 self.LogWarning("Gateway is already %s" % self.gateway)
15864 self.pool.Reserve(self.gateway, external=True)
15865 if self.network.gateway:
15866 self.pool.Release(self.network.gateway, external=True)
15867 self.network.gateway = self.gateway
15869 if self.op.add_reserved_ips:
15870 for ip in self.op.add_reserved_ips:
15872 if self.pool.IsReserved(ip):
15873 self.LogWarning("IP %s is already reserved" % ip)
15875 self.pool.Reserve(ip, external=True)
15876 except errors.AddressPoolError, e:
15877 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15879 if self.op.remove_reserved_ips:
15880 for ip in self.op.remove_reserved_ips:
15881 if ip == self.network.gateway:
15882 self.LogWarning("Cannot unreserve Gateway's IP")
15885 if not self.pool.IsReserved(ip):
15886 self.LogWarning("IP %s is already unreserved" % ip)
15888 self.pool.Release(ip, external=True)
15889 except errors.AddressPoolError, e:
15890 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15892 if self.op.mac_prefix:
15893 self.network.mac_prefix = self.mac_prefix
15895 if self.op.network6:
15896 self.network.network6 = self.network6
15898 if self.op.gateway6:
15899 self.network.gateway6 = self.gateway6
15901 if self.op.network_type:
15902 self.network.network_type = self.network_type
15904 self.pool.Validate()
15906 self.cfg.Update(self.network, feedback_fn)
15909 class _NetworkQuery(_QueryBase):
15910 FIELDS = query.NETWORK_FIELDS
15912 def ExpandNames(self, lu):
15913 lu.needed_locks = {}
15915 self._all_networks = lu.cfg.GetAllNetworksInfo()
15916 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15919 self.wanted = [name_to_uuid[name]
15920 for name in utils.NiceSort(name_to_uuid.keys())]
15922 # Accept names to be either names or UUIDs.
15925 all_uuid = frozenset(self._all_networks.keys())
15927 for name in self.names:
15928 if name in all_uuid:
15929 self.wanted.append(name)
15930 elif name in name_to_uuid:
15931 self.wanted.append(name_to_uuid[name])
15933 missing.append(name)
15936 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15937 errors.ECODE_NOENT)
15939 def DeclareLocks(self, lu, level):
15942 def _GetQueryData(self, lu):
15943 """Computes the list of networks and their attributes.
15946 do_instances = query.NETQ_INST in self.requested_data
15947 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15948 do_stats = query.NETQ_STATS in self.requested_data
15950 network_to_groups = None
15951 network_to_instances = None
15954 # For NETQ_GROUP, we need to map network->[groups]
15956 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15957 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15960 all_instances = lu.cfg.GetAllInstancesInfo()
15961 all_nodes = lu.cfg.GetAllNodesInfo()
15962 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15964 for group in all_groups.values():
15966 group_nodes = [node.name for node in all_nodes.values() if
15967 node.group == group.uuid]
15968 group_instances = [instance for instance in all_instances.values()
15969 if instance.primary_node in group_nodes]
15971 for net_uuid in group.networks.keys():
15972 if net_uuid in network_to_groups:
15973 netparams = group.networks[net_uuid]
15974 mode = netparams[constants.NIC_MODE]
15975 link = netparams[constants.NIC_LINK]
15976 info = group.name + '(' + mode + ', ' + link + ')'
15977 network_to_groups[net_uuid].append(info)
15980 for instance in group_instances:
15981 for nic in instance.nics:
15982 if nic.network == self._all_networks[net_uuid].name:
15983 network_to_instances[net_uuid].append(instance.name)
15988 for uuid, net in self._all_networks.items():
15989 if uuid in self.wanted:
15990 pool = network.AddressPool(net)
15992 "free_count": pool.GetFreeCount(),
15993 "reserved_count": pool.GetReservedCount(),
15994 "map": pool.GetMap(),
15995 "external_reservations": ", ".join(pool.GetExternalReservations()),
15998 return query.NetworkQueryData([self._all_networks[uuid]
15999 for uuid in self.wanted],
16001 network_to_instances,
16005 class LUNetworkQuery(NoHooksLU):
16006 """Logical unit for querying networks.
16011 def CheckArguments(self):
16012 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16013 self.op.output_fields, False)
16015 def ExpandNames(self):
16016 self.nq.ExpandNames(self)
16018 def Exec(self, feedback_fn):
16019 return self.nq.OldStyleQuery(self)
16022 class LUNetworkConnect(LogicalUnit):
16023 """Connect a network to a nodegroup
16026 HPATH = "network-connect"
16027 HTYPE = constants.HTYPE_NETWORK
16030 def ExpandNames(self):
16031 self.network_name = self.op.network_name
16032 self.group_name = self.op.group_name
16033 self.network_mode = self.op.network_mode
16034 self.network_link = self.op.network_link
16036 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16037 self.network = self.cfg.GetNetwork(self.network_uuid)
16038 if self.network is None:
16039 raise errors.OpPrereqError("Network %s does not exist" %
16040 self.network_name, errors.ECODE_INVAL)
16042 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16043 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16044 if self.group is None:
16045 raise errors.OpPrereqError("Group %s does not exist" %
16046 self.group_name, errors.ECODE_INVAL)
16048 self.needed_locks = {
16049 locking.LEVEL_INSTANCE: [],
16050 locking.LEVEL_NODEGROUP: [self.group_uuid],
16052 self.share_locks[locking.LEVEL_INSTANCE] = 1
16054 def DeclareLocks(self, level):
16055 if level == locking.LEVEL_INSTANCE:
16056 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16058 # Lock instances optimistically, needs verification once group lock has
16060 self.needed_locks[locking.LEVEL_INSTANCE] = \
16061 self.cfg.GetNodeGroupInstances(self.group_uuid)
16063 def BuildHooksEnv(self):
16065 ret["GROUP_NAME"] = self.group_name
16066 ret["GROUP_NETWORK_MODE"] = self.network_mode
16067 ret["GROUP_NETWORK_LINK"] = self.network_link
16068 ret.update(_BuildNetworkHookEnvByObject(self.network))
16071 def BuildHooksNodes(self):
16072 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16073 return (nodes, nodes)
16075 def CheckPrereq(self):
16076 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16079 self.netparams = dict()
16080 self.netparams[constants.NIC_MODE] = self.network_mode
16081 self.netparams[constants.NIC_LINK] = self.network_link
16082 objects.NIC.CheckParameterSyntax(self.netparams)
16084 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16085 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16086 self.connected = False
16087 if self.network_uuid in self.group.networks:
16088 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16089 (self.network_name, self.group.name))
16090 self.connected = True
16093 pool = network.AddressPool(self.network)
16094 if self.op.conflicts_check:
16095 groupinstances = []
16096 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16097 groupinstances.append(self.cfg.GetInstanceInfo(n))
16098 instances = [(instance.name, idx, nic.ip)
16099 for instance in groupinstances
16100 for idx, nic in enumerate(instance.nics)
16101 if (not nic.network and pool.Contains(nic.ip))]
16103 self.LogWarning("Following occurences use IPs from network %s"
16104 " that is about to connect to nodegroup %s: %s" %
16105 (self.network_name, self.group.name,
16107 raise errors.OpPrereqError("Conflicting IPs found."
16108 " Please remove/modify"
16109 " corresponding NICs",
16110 errors.ECODE_INVAL)
16112 def Exec(self, feedback_fn):
16116 self.group.networks[self.network_uuid] = self.netparams
16117 self.cfg.Update(self.group, feedback_fn)
16120 class LUNetworkDisconnect(LogicalUnit):
16121 """Disconnect a network to a nodegroup
16124 HPATH = "network-disconnect"
16125 HTYPE = constants.HTYPE_NETWORK
16128 def ExpandNames(self):
16129 self.network_name = self.op.network_name
16130 self.group_name = self.op.group_name
16132 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16133 self.network = self.cfg.GetNetwork(self.network_uuid)
16134 if self.network is None:
16135 raise errors.OpPrereqError("Network %s does not exist" %
16136 self.network_name, errors.ECODE_INVAL)
16138 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16139 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16140 if self.group is None:
16141 raise errors.OpPrereqError("Group %s does not exist" %
16142 self.group_name, errors.ECODE_INVAL)
16144 self.needed_locks = {
16145 locking.LEVEL_INSTANCE: [],
16146 locking.LEVEL_NODEGROUP: [self.group_uuid],
16148 self.share_locks[locking.LEVEL_INSTANCE] = 1
16150 def DeclareLocks(self, level):
16151 if level == locking.LEVEL_INSTANCE:
16152 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16154 # Lock instances optimistically, needs verification once group lock has
16156 self.needed_locks[locking.LEVEL_INSTANCE] = \
16157 self.cfg.GetNodeGroupInstances(self.group_uuid)
16159 def BuildHooksEnv(self):
16161 ret["GROUP_NAME"] = self.group_name
16162 ret.update(_BuildNetworkHookEnvByObject(self.network))
16165 def BuildHooksNodes(self):
16166 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16167 return (nodes, nodes)
16169 def CheckPrereq(self):
16170 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16173 self.connected = True
16174 if self.network_uuid not in self.group.networks:
16175 self.LogWarning("Network '%s' is"
16176 " not mapped to group '%s'" %
16177 (self.network_name, self.group.name))
16178 self.connected = False
16181 if self.op.conflicts_check:
16182 groupinstances = []
16183 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16184 groupinstances.append(self.cfg.GetInstanceInfo(n))
16185 instances = [(instance.name, idx, nic.ip)
16186 for instance in groupinstances
16187 for idx, nic in enumerate(instance.nics)
16188 if nic.network == self.network_name]
16190 self.LogWarning("Following occurences use IPs from network %s"
16191 " that is about to disconnected from the nodegroup"
16193 (self.network_name, self.group.name,
16195 raise errors.OpPrereqError("Conflicting IPs."
16196 " Please remove/modify"
16197 " corresponding NICS",
16198 errors.ECODE_INVAL)
16200 def Exec(self, feedback_fn):
16201 if not self.connected:
16204 del self.group.networks[self.network_uuid]
16205 self.cfg.Update(self.group, feedback_fn)
16208 #: Query type implementations
16210 constants.QR_CLUSTER: _ClusterQuery,
16211 constants.QR_INSTANCE: _InstanceQuery,
16212 constants.QR_NODE: _NodeQuery,
16213 constants.QR_GROUP: _GroupQuery,
16214 constants.QR_NETWORK: _NetworkQuery,
16215 constants.QR_OS: _OsQuery,
16216 constants.QR_EXPORT: _ExportQuery,
16219 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16222 def _GetQueryImplementation(name):
16223 """Returns the implemtnation for a query type.
16225 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16229 return _QUERY_IMPL[name]
16231 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16232 errors.ECODE_INVAL)
16235 def _CheckForConflictingIp(lu, ip, node):
16236 """In case of conflicting ip raise error.
16239 @param ip: ip address
16241 @param node: node name
16244 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16245 if conf_net is not None:
16246 raise errors.OpPrereqError("Conflicting IP found:"
16247 " %s <> %s." % (ip, conf_net),
16248 errors.ECODE_INVAL)
16250 return (None, None)